wchevreuil commented on code in PR #7149: URL: https://github.com/apache/hbase/pull/7149#discussion_r2316232114
########## hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RefreshHFilesTableProcedure.java: ########## @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import java.io.IOException; +import java.util.List; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RefreshHFilesTableProcedureState; + [email protected] +public class RefreshHFilesTableProcedure + extends AbstractStateMachineTableProcedure<RefreshHFilesTableProcedureState> { + private static final Logger LOG = LoggerFactory.getLogger(RefreshHFilesTableProcedure.class); + + private TableName tableName; + private String namespaceName; + + public RefreshHFilesTableProcedure() { + super(); + } + + public RefreshHFilesTableProcedure(MasterProcedureEnv env) { + super(env); + } + + public RefreshHFilesTableProcedure(MasterProcedureEnv env, TableName tableName) { + super(env); + this.tableName = tableName; + } + + public RefreshHFilesTableProcedure(MasterProcedureEnv env, String namespaceName) { + super(env); + this.namespaceName = namespaceName; + } + + @Override + public TableOperationType getTableOperationType() { + return TableOperationType.REFRESH_HFILES; + } + + @Override + protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { + super.serializeStateData(serializer); + MasterProcedureProtos.RefreshHFilesTableProcedureStateData.Builder builder = + MasterProcedureProtos.RefreshHFilesTableProcedureStateData.newBuilder(); + if (tableName != null && namespaceName == null) { + builder.setTableName(ProtobufUtil.toProtoTableName(tableName)); + } else if (tableName == null && namespaceName != null) { + builder.setNamespaceName(namespaceName); + } + serializer.serialize(builder.build()); + } + + @Override + protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { + super.deserializeStateData(serializer); + MasterProcedureProtos.RefreshHFilesTableProcedureStateData data = + serializer.deserialize(MasterProcedureProtos.RefreshHFilesTableProcedureStateData.class); + if (data.hasTableName() && !data.hasNamespaceName()) { + this.tableName = ProtobufUtil.toTableName(data.getTableName()); + } else if (!data.hasTableName() && data.hasNamespaceName()) { + this.namespaceName = data.getNamespaceName(); + } + } + + @Override + public TableName getTableName() { + if (tableName != null && namespaceName == null) { + return tableName; + } + return DUMMY_NAMESPACE_TABLE_NAME; + } + + @Override + protected RefreshHFilesTableProcedureState getInitialState() { + return RefreshHFilesTableProcedureState.REFRESH_HFILES_PREPARE; + } + + @Override + protected int getStateId(RefreshHFilesTableProcedureState state) { + return state.getNumber(); + } + + @Override + protected RefreshHFilesTableProcedureState getState(int stateId) { + return RefreshHFilesTableProcedureState.forNumber(stateId); + } + + @Override + protected void rollbackState(MasterProcedureEnv env, RefreshHFilesTableProcedureState state) + throws IOException, InterruptedException { + // Refresh HFiles is idempotent operation hence rollback is not needed + LOG.trace("Rollback not implemented for RefreshHFilesTableProcedure state: {}", state); + } + + @Override + protected Flow executeFromState(MasterProcedureEnv env, RefreshHFilesTableProcedureState state) { + LOG.info("Executing RefreshHFilesTableProcedureState state: {}", state); + + try { + return switch (state) { + case REFRESH_HFILES_PREPARE -> prepare(env); + case REFRESH_HFILES_REFRESH_REGION -> refreshHFiles(env); + case REFRESH_HFILES_FINISH -> finish(); + default -> throw new UnsupportedOperationException("Unhandled state: " + state); + }; + } catch (Exception ex) { + LOG.error("Error in RefreshHFilesTableProcedure state {}", state, ex); + setFailure("RefreshHFilesTableProcedure", ex); + return Flow.NO_MORE_STATE; + } + } + + private Flow prepare(final MasterProcedureEnv env) { + setNextState(RefreshHFilesTableProcedureState.REFRESH_HFILES_REFRESH_REGION); + return Flow.HAS_MORE_STATE; + } + + private void refreshHFilesForTable(final MasterProcedureEnv env, TableName tableName) { + addChildProcedure(env.getAssignmentManager().getTableRegions(tableName, true).stream() + .map(r -> new RefreshHFilesRegionProcedure(r)).toArray(RefreshHFilesRegionProcedure[]::new)); + } + + private Flow refreshHFiles(final MasterProcedureEnv env) throws IOException { + if (tableName != null && namespaceName == null) { + refreshHFilesForTable(env, tableName); + } else if (tableName == null && namespaceName != null) { + final List<TableName> tables = + env.getMasterServices().listTableNamesByNamespace(namespaceName); + for (TableName table : tables) { + refreshHFilesForTable(env, table); + } + } else { + final List<TableName> tables = env.getMasterServices().getTableDescriptors().getAll().values() + .stream().map(TableDescriptor::getTableName).toList(); + for (TableName table : tables) { Review Comment: Can't this be done within the stream above, rather than going through a second loop? ########## hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RefreshHFilesRegionProcedure.java: ########## @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import java.io.IOException; +import java.util.Optional; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.assignment.RegionStateNode; +import org.apache.hadoop.hbase.master.assignment.RegionStates; +import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException; +import org.apache.hadoop.hbase.procedure2.Procedure; +import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; +import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; +import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; +import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher; +import org.apache.hadoop.hbase.procedure2.RemoteProcedureException; +import org.apache.hadoop.hbase.regionserver.RefreshHFilesCallable; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; + [email protected] +public class RefreshHFilesRegionProcedure extends Procedure<MasterProcedureEnv> + implements TableProcedureInterface, + RemoteProcedureDispatcher.RemoteProcedure<MasterProcedureEnv, ServerName> { + private RegionInfo region; + + public RefreshHFilesRegionProcedure() { + } + + public RefreshHFilesRegionProcedure(RegionInfo region) { + this.region = region; + } + + @Override + protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { + MasterProcedureProtos.RefreshHFilesRegionProcedureStateData data = + serializer.deserialize(MasterProcedureProtos.RefreshHFilesRegionProcedureStateData.class); + this.region = ProtobufUtil.toRegionInfo(data.getRegion()); + // TODO Get the Data from region server + } + + @Override + protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { + MasterProcedureProtos.RefreshHFilesRegionProcedureStateData.Builder builder = + MasterProcedureProtos.RefreshHFilesRegionProcedureStateData.newBuilder(); + builder.setRegion(ProtobufUtil.toRegionInfo(region)); + // TODO add data that you want to pass to region server + serializer.serialize(builder.build()); + } + + @Override + protected boolean abort(MasterProcedureEnv env) { + return false; + } + + @Override + protected void rollback(MasterProcedureEnv env) throws IOException, InterruptedException { + throw new UnsupportedOperationException(); + } + + @Override + protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env) + throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { + RegionStates regionStates = env.getAssignmentManager().getRegionStates(); + RegionStateNode regionNode = regionStates.getRegionStateNode(region); Review Comment: >However if region split happens in active cluster then it will get communicated to read only cluster post execution of refresh_meta command (Also mentioned in design doc). So if active cluster splits a region, and daughters get compacted before refresh_meta is called, we may have the region removed by catalog janitor running in active cluster. In this case, read replica cluster would get FNFE when trying to read the removed region. I guess that's documented? ########## hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RefreshHFilesRegionProcedure.java: ########## @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import java.io.IOException; +import java.util.Optional; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.RegionState; +import org.apache.hadoop.hbase.master.assignment.RegionStateNode; +import org.apache.hadoop.hbase.master.assignment.RegionStates; +import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException; +import org.apache.hadoop.hbase.procedure2.Procedure; +import org.apache.hadoop.hbase.procedure2.ProcedureEvent; +import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; +import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; +import org.apache.hadoop.hbase.procedure2.ProcedureUtil; +import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; +import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher; +import org.apache.hadoop.hbase.procedure2.RemoteProcedureException; +import org.apache.hadoop.hbase.regionserver.RefreshHFilesCallable; +import org.apache.hadoop.hbase.util.RetryCounter; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; + [email protected] +public class RefreshHFilesRegionProcedure extends Procedure<MasterProcedureEnv> + implements TableProcedureInterface, + RemoteProcedureDispatcher.RemoteProcedure<MasterProcedureEnv, ServerName> { + private static final Logger LOG = LoggerFactory.getLogger(RefreshHFilesRegionProcedure.class); + private RegionInfo region; + private ProcedureEvent<?> event; + private boolean dispatched; + private boolean succ; + private RetryCounter retryCounter; + + public RefreshHFilesRegionProcedure() { + } + + public RefreshHFilesRegionProcedure(RegionInfo region) { + this.region = region; + } + + @Override + protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { + MasterProcedureProtos.RefreshHFilesRegionProcedureStateData data = + serializer.deserialize(MasterProcedureProtos.RefreshHFilesRegionProcedureStateData.class); + this.region = ProtobufUtil.toRegionInfo(data.getRegion()); + } + + @Override + protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { + MasterProcedureProtos.RefreshHFilesRegionProcedureStateData.Builder builder = + MasterProcedureProtos.RefreshHFilesRegionProcedureStateData.newBuilder(); + builder.setRegion(ProtobufUtil.toRegionInfo(region)); + serializer.serialize(builder.build()); + } + + @Override + protected boolean abort(MasterProcedureEnv env) { + return false; + } + + @Override + protected void rollback(MasterProcedureEnv env) throws IOException, InterruptedException { + throw new UnsupportedOperationException(); + } + + private void setTimeoutForSuspend(MasterProcedureEnv env, String reason) { + if (retryCounter == null) { + retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration()); + } + long backoff = retryCounter.getBackoffTimeAndIncrementAttempts(); + LOG.warn("{} can not run currently because {}, wait {} ms to retry", this, reason, backoff); + setTimeout(Math.toIntExact(backoff)); + setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); + skipPersistence(); + } + + @Override + protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env) + throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { + if (dispatched) { + if (succ) { + return null; + } + dispatched = false; + } + + RegionStates regionStates = env.getAssignmentManager().getRegionStates(); + RegionStateNode regionNode = regionStates.getRegionStateNode(region); + + if (regionNode.getProcedure() != null) { + setTimeoutForSuspend(env, String.format("region %s has a TRSP attached %s", + region.getRegionNameAsString(), regionNode.getProcedure())); + throw new ProcedureSuspendedException(); + } + + if (!regionNode.isInState(RegionState.State.OPEN)) { + LOG.info("State of region {} is not OPEN. Skip {} ...", region, this); Review Comment: nit: better raise log severity to WARN. ########## hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RefreshHFilesRegionProcedure.java: ########## @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import java.io.IOException; +import java.util.Optional; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.RegionState; +import org.apache.hadoop.hbase.master.assignment.RegionStateNode; +import org.apache.hadoop.hbase.master.assignment.RegionStates; +import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException; +import org.apache.hadoop.hbase.procedure2.Procedure; +import org.apache.hadoop.hbase.procedure2.ProcedureEvent; +import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; +import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; +import org.apache.hadoop.hbase.procedure2.ProcedureUtil; +import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; +import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher; +import org.apache.hadoop.hbase.procedure2.RemoteProcedureException; +import org.apache.hadoop.hbase.regionserver.RefreshHFilesCallable; +import org.apache.hadoop.hbase.util.RetryCounter; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; + [email protected] +public class RefreshHFilesRegionProcedure extends Procedure<MasterProcedureEnv> + implements TableProcedureInterface, + RemoteProcedureDispatcher.RemoteProcedure<MasterProcedureEnv, ServerName> { + private static final Logger LOG = LoggerFactory.getLogger(RefreshHFilesRegionProcedure.class); + private RegionInfo region; + private ProcedureEvent<?> event; + private boolean dispatched; + private boolean succ; + private RetryCounter retryCounter; + + public RefreshHFilesRegionProcedure() { + } + + public RefreshHFilesRegionProcedure(RegionInfo region) { + this.region = region; + } + + @Override + protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { + MasterProcedureProtos.RefreshHFilesRegionProcedureStateData data = + serializer.deserialize(MasterProcedureProtos.RefreshHFilesRegionProcedureStateData.class); + this.region = ProtobufUtil.toRegionInfo(data.getRegion()); + } + + @Override + protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { + MasterProcedureProtos.RefreshHFilesRegionProcedureStateData.Builder builder = + MasterProcedureProtos.RefreshHFilesRegionProcedureStateData.newBuilder(); + builder.setRegion(ProtobufUtil.toRegionInfo(region)); + serializer.serialize(builder.build()); + } + + @Override + protected boolean abort(MasterProcedureEnv env) { + return false; + } + + @Override + protected void rollback(MasterProcedureEnv env) throws IOException, InterruptedException { + throw new UnsupportedOperationException(); + } + + private void setTimeoutForSuspend(MasterProcedureEnv env, String reason) { + if (retryCounter == null) { + retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration()); + } + long backoff = retryCounter.getBackoffTimeAndIncrementAttempts(); + LOG.warn("{} can not run currently because {}, wait {} ms to retry", this, reason, backoff); + setTimeout(Math.toIntExact(backoff)); + setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); + skipPersistence(); + } + + @Override + protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env) + throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { + if (dispatched) { + if (succ) { + return null; + } + dispatched = false; + } + + RegionStates regionStates = env.getAssignmentManager().getRegionStates(); + RegionStateNode regionNode = regionStates.getRegionStateNode(region); + + if (regionNode.getProcedure() != null) { + setTimeoutForSuspend(env, String.format("region %s has a TRSP attached %s", + region.getRegionNameAsString(), regionNode.getProcedure())); + throw new ProcedureSuspendedException(); + } + + if (!regionNode.isInState(RegionState.State.OPEN)) { + LOG.info("State of region {} is not OPEN. Skip {} ...", region, this); + setTimeoutForSuspend(env, String.format("region state of %s is %s", + region.getRegionNameAsString(), regionNode.getState())); + return null; Review Comment: Why not a ProcedureSuspendedException here? We just want to give away from refreshing? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
