http://git-wip-us.apache.org/repos/asf/hbase/blob/e1d5c3d2/hbase-protocol/src/main/protobuf/Master.proto ---------------------------------------------------------------------- diff --git a/hbase-protocol/src/main/protobuf/Master.proto b/hbase-protocol/src/main/protobuf/Master.proto index 79bb862..1f7a3b7 100644 --- a/hbase-protocol/src/main/protobuf/Master.proto +++ b/hbase-protocol/src/main/protobuf/Master.proto @@ -370,9 +370,12 @@ message DeleteSnapshotResponse { message RestoreSnapshotRequest { required SnapshotDescription snapshot = 1; + optional uint64 nonce_group = 2 [default = 0]; + optional uint64 nonce = 3 [default = 0]; } message RestoreSnapshotResponse { + required uint64 proc_id = 1; } /* if you don't send the snapshot, then you will get it back @@ -735,11 +738,6 @@ service MasterService { rpc RestoreSnapshot(RestoreSnapshotRequest) returns(RestoreSnapshotResponse); /** - * Determine if the snapshot restore is done yet. - */ - rpc IsRestoreSnapshotDone(IsRestoreSnapshotDoneRequest) returns(IsRestoreSnapshotDoneResponse); - - /** * Execute a distributed procedure. */ rpc ExecProcedure(ExecProcedureRequest) returns(ExecProcedureResponse);
http://git-wip-us.apache.org/repos/asf/hbase/blob/e1d5c3d2/hbase-protocol/src/main/protobuf/MasterProcedure.proto ---------------------------------------------------------------------- diff --git a/hbase-protocol/src/main/protobuf/MasterProcedure.proto b/hbase-protocol/src/main/protobuf/MasterProcedure.proto index 2d2aff4..87aae6a 100644 --- a/hbase-protocol/src/main/protobuf/MasterProcedure.proto +++ b/hbase-protocol/src/main/protobuf/MasterProcedure.proto @@ -222,6 +222,46 @@ message DisableTableStateData { required bool skip_table_state_check = 3; } +message RestoreParentToChildRegionsPair { + required string parent_region_name = 1; + required string child1_region_name = 2; + required string child2_region_name = 3; +} + +enum CloneSnapshotState { + CLONE_SNAPSHOT_PRE_OPERATION = 1; + CLONE_SNAPSHOT_WRITE_FS_LAYOUT = 2; + CLONE_SNAPSHOT_ADD_TO_META = 3; + CLONE_SNAPSHOT_ASSIGN_REGIONS = 4; + CLONE_SNAPSHOT_UPDATE_DESC_CACHE = 5; + CLONE_SNAPSHOT_POST_OPERATION = 6; +} + +message CloneSnapshotStateData { + required UserInformation user_info = 1; + required SnapshotDescription snapshot = 2; + required TableSchema table_schema = 3; + repeated RegionInfo region_info = 4; + repeated RestoreParentToChildRegionsPair parent_to_child_regions_pair_list = 5; +} + +enum RestoreSnapshotState { + RESTORE_SNAPSHOT_PRE_OPERATION = 1; + RESTORE_SNAPSHOT_UPDATE_TABLE_DESCRIPTOR = 2; + RESTORE_SNAPSHOT_WRITE_FS_LAYOUT = 3; + RESTORE_SNAPSHOT_UPDATE_META = 4; +} + +message RestoreSnapshotStateData { + required UserInformation user_info = 1; + required SnapshotDescription snapshot = 2; + required TableSchema modified_table_schema = 3; + repeated RegionInfo region_info_for_restore = 4; + repeated RegionInfo region_info_for_remove = 5; + repeated RegionInfo region_info_for_add = 6; + repeated RestoreParentToChildRegionsPair parent_to_child_regions_pair_list = 7; +} + message ServerCrashStateData { required ServerName server_name = 1; optional bool distributed_log_replay = 2; http://git-wip-us.apache.org/repos/asf/hbase/blob/e1d5c3d2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index cdadff4..319d363 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -916,33 +916,6 @@ public class MasterRpcServices extends RSRpcServices } /** - * Returns the status of the requested snapshot restore/clone operation. - * This method is not exposed to the user, it is just used internally by HBaseAdmin - * to verify if the restore is completed. - * - * No exceptions are thrown if the restore is not running, the result will be "done". - * - * @return done <tt>true</tt> if the restore/clone operation is completed. - * @throws ServiceException if the operation failed. - */ - @Override - public IsRestoreSnapshotDoneResponse isRestoreSnapshotDone(RpcController controller, - IsRestoreSnapshotDoneRequest request) throws ServiceException { - try { - master.checkInitialized(); - SnapshotDescription snapshot = request.getSnapshot(); - IsRestoreSnapshotDoneResponse.Builder builder = IsRestoreSnapshotDoneResponse.newBuilder(); - boolean done = master.snapshotManager.isRestoreDone(snapshot); - builder.setDone(done); - return builder.build(); - } catch (ForeignException e) { - throw new ServiceException(e.getCause()); - } catch (IOException e) { - throw new ServiceException(e); - } - } - - /** * Checks if the specified snapshot is done. * @return true if the snapshot is in file system ready to use, * false if the snapshot is in the process of completing @@ -1215,8 +1188,9 @@ public class MasterRpcServices extends RSRpcServices TableName dstTable = TableName.valueOf(request.getSnapshot().getTable()); master.getNamespace(dstTable.getNamespaceAsString()); SnapshotDescription reqSnapshot = request.getSnapshot(); - master.snapshotManager.restoreSnapshot(reqSnapshot); - return RestoreSnapshotResponse.newBuilder().build(); + long procId = master.snapshotManager.restoreOrCloneSnapshot( + reqSnapshot, request.getNonceGroup(), request.getNonce()); + return RestoreSnapshotResponse.newBuilder().setProcId(procId).build(); } catch (ForeignException e) { throw new ServiceException(e.getCause()); } catch (IOException e) { http://git-wip-us.apache.org/repos/asf/hbase/blob/e1d5c3d2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CloneSnapshotProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CloneSnapshotProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CloneSnapshotProcedure.java new file mode 100644 index 0000000..9477177 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CloneSnapshotProcedure.java @@ -0,0 +1,522 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.procedure; + +import java.io.InputStream; +import java.io.OutputStream; +import java.io.IOException; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.TableDescriptor; +import org.apache.hadoop.hbase.TableExistsException; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.master.MasterCoprocessorHost; +import org.apache.hadoop.hbase.master.MasterFileSystem; +import org.apache.hadoop.hbase.master.MetricsSnapshot; +import org.apache.hadoop.hbase.master.procedure.CreateTableProcedure.CreateHdfsRegions; +import org.apache.hadoop.hbase.monitoring.MonitoredTask; +import org.apache.hadoop.hbase.monitoring.TaskMonitor; +import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos; +import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos; +import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.CloneSnapshotState; +import org.apache.hadoop.hbase.util.FSTableDescriptors; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException; +import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.SnapshotManifest; +import org.apache.hadoop.security.UserGroupInformation; + +import com.google.common.base.Preconditions; + +@InterfaceAudience.Private +public class CloneSnapshotProcedure + extends StateMachineProcedure<MasterProcedureEnv, CloneSnapshotState> + implements TableProcedureInterface { + private static final Log LOG = LogFactory.getLog(CloneSnapshotProcedure.class); + + private final AtomicBoolean aborted = new AtomicBoolean(false); + + private UserGroupInformation user; + private HTableDescriptor hTableDescriptor; + private SnapshotDescription snapshot; + private List<HRegionInfo> newRegions = null; + private Map<String, Pair<String, String> > parentsToChildrenPairMap = + new HashMap<String, Pair<String, String>>(); + + // Monitor + private MonitoredTask monitorStatus = null; + + private Boolean traceEnabled = null; + + /** + * Constructor (for failover) + */ + public CloneSnapshotProcedure() { + } + + /** + * Constructor + * @param env MasterProcedureEnv + * @param hTableDescriptor the table to operate on + * @param snapshot snapshot to clone from + * @throws IOException + */ + public CloneSnapshotProcedure( + final MasterProcedureEnv env, + final HTableDescriptor hTableDescriptor, + final SnapshotDescription snapshot) + throws IOException { + this.hTableDescriptor = hTableDescriptor; + this.snapshot = snapshot; + this.user = env.getRequestUser().getUGI(); + this.setOwner(this.user.getShortUserName()); + + getMonitorStatus(); + } + + /** + * Set up monitor status if it is not created. + */ + private MonitoredTask getMonitorStatus() { + if (monitorStatus == null) { + monitorStatus = TaskMonitor.get().createStatus("Cloning snapshot '" + snapshot.getName() + + "' to table " + getTableName()); + } + return monitorStatus; + } + + @Override + protected Flow executeFromState(final MasterProcedureEnv env, final CloneSnapshotState state) + throws InterruptedException { + if (isTraceEnabled()) { + LOG.trace(this + " execute state=" + state); + } + try { + switch (state) { + case CLONE_SNAPSHOT_PRE_OPERATION: + // Verify if we can clone the table + prepareClone(env); + + preCloneSnapshot(env); + setNextState(CloneSnapshotState.CLONE_SNAPSHOT_WRITE_FS_LAYOUT); + break; + case CLONE_SNAPSHOT_WRITE_FS_LAYOUT: + newRegions = createFilesystemLayout(env, hTableDescriptor, newRegions); + setNextState(CloneSnapshotState.CLONE_SNAPSHOT_ADD_TO_META); + break; + case CLONE_SNAPSHOT_ADD_TO_META: + addRegionsToMeta(env); + setNextState(CloneSnapshotState.CLONE_SNAPSHOT_ASSIGN_REGIONS); + break; + case CLONE_SNAPSHOT_ASSIGN_REGIONS: + CreateTableProcedure.assignRegions(env, getTableName(), newRegions); + setNextState(CloneSnapshotState.CLONE_SNAPSHOT_UPDATE_DESC_CACHE); + break; + case CLONE_SNAPSHOT_UPDATE_DESC_CACHE: + CreateTableProcedure.updateTableDescCache(env, getTableName()); + setNextState(CloneSnapshotState.CLONE_SNAPSHOT_POST_OPERATION); + break; + case CLONE_SNAPSHOT_POST_OPERATION: + postCloneSnapshot(env); + + MetricsSnapshot metricsSnapshot = new MetricsSnapshot(); + metricsSnapshot.addSnapshotClone( + getMonitorStatus().getCompletionTimestamp() - getMonitorStatus().getStartTime()); + getMonitorStatus().markComplete("Clone snapshot '"+ snapshot.getName() +"' completed!"); + return Flow.NO_MORE_STATE; + default: + throw new UnsupportedOperationException("unhandled state=" + state); + } + } catch (IOException e) { + LOG.error("Error trying to create table=" + getTableName() + " state=" + state, e); + setFailure("master-create-table", e); + } + return Flow.HAS_MORE_STATE; + } + + @Override + protected void rollbackState(final MasterProcedureEnv env, final CloneSnapshotState state) + throws IOException { + if (isTraceEnabled()) { + LOG.trace(this + " rollback state=" + state); + } + try { + switch (state) { + case CLONE_SNAPSHOT_POST_OPERATION: + // TODO-MAYBE: call the deleteTable coprocessor event? + break; + case CLONE_SNAPSHOT_UPDATE_DESC_CACHE: + DeleteTableProcedure.deleteTableDescriptorCache(env, getTableName()); + break; + case CLONE_SNAPSHOT_ASSIGN_REGIONS: + DeleteTableProcedure.deleteAssignmentState(env, getTableName()); + break; + case CLONE_SNAPSHOT_ADD_TO_META: + DeleteTableProcedure.deleteFromMeta(env, getTableName(), newRegions); + break; + case CLONE_SNAPSHOT_WRITE_FS_LAYOUT: + DeleteTableProcedure.deleteFromFs(env, getTableName(), newRegions, false); + break; + case CLONE_SNAPSHOT_PRE_OPERATION: + DeleteTableProcedure.deleteTableStates(env, getTableName()); + // TODO-MAYBE: call the deleteTable coprocessor event? + break; + default: + throw new UnsupportedOperationException("unhandled state=" + state); + } + } catch (IOException e) { + // This will be retried. Unless there is a bug in the code, + // this should be just a "temporary error" (e.g. network down) + LOG.warn("Failed rollback attempt step=" + state + " table=" + getTableName(), e); + throw e; + } + } + + @Override + protected CloneSnapshotState getState(final int stateId) { + return CloneSnapshotState.valueOf(stateId); + } + + @Override + protected int getStateId(final CloneSnapshotState state) { + return state.getNumber(); + } + + @Override + protected CloneSnapshotState getInitialState() { + return CloneSnapshotState.CLONE_SNAPSHOT_PRE_OPERATION; + } + + @Override + protected void setNextState(final CloneSnapshotState state) { + if (aborted.get()) { + setAbortFailure("clone-snapshot", "abort requested"); + } else { + super.setNextState(state); + } + } + + @Override + public TableName getTableName() { + return hTableDescriptor.getTableName(); + } + + @Override + public TableOperationType getTableOperationType() { + return TableOperationType.CREATE; // Clone is creating a table + } + + @Override + public boolean abort(final MasterProcedureEnv env) { + aborted.set(true); + return true; + } + + @Override + public void toStringClassDetails(StringBuilder sb) { + sb.append(getClass().getSimpleName()); + sb.append(" (table="); + sb.append(getTableName()); + sb.append(" snapshot="); + sb.append(snapshot); + sb.append(")"); + } + + @Override + public void serializeStateData(final OutputStream stream) throws IOException { + super.serializeStateData(stream); + + MasterProcedureProtos.CloneSnapshotStateData.Builder cloneSnapshotMsg = + MasterProcedureProtos.CloneSnapshotStateData.newBuilder() + .setUserInfo(MasterProcedureUtil.toProtoUserInfo(this.user)) + .setSnapshot(this.snapshot) + .setTableSchema(hTableDescriptor.convert()); + if (newRegions != null) { + for (HRegionInfo hri: newRegions) { + cloneSnapshotMsg.addRegionInfo(HRegionInfo.convert(hri)); + } + } + if (!parentsToChildrenPairMap.isEmpty()) { + final Iterator<Map.Entry<String, Pair<String, String>>> it = + parentsToChildrenPairMap.entrySet().iterator(); + while (it.hasNext()) { + final Map.Entry<String, Pair<String, String>> entry = it.next(); + + MasterProcedureProtos.RestoreParentToChildRegionsPair.Builder parentToChildrenPair = + MasterProcedureProtos.RestoreParentToChildRegionsPair.newBuilder() + .setParentRegionName(entry.getKey()) + .setChild1RegionName(entry.getValue().getFirst()) + .setChild2RegionName(entry.getValue().getSecond()); + cloneSnapshotMsg.addParentToChildRegionsPairList(parentToChildrenPair); + } + } + cloneSnapshotMsg.build().writeDelimitedTo(stream); + } + + @Override + public void deserializeStateData(final InputStream stream) throws IOException { + super.deserializeStateData(stream); + + MasterProcedureProtos.CloneSnapshotStateData cloneSnapshotMsg = + MasterProcedureProtos.CloneSnapshotStateData.parseDelimitedFrom(stream); + user = MasterProcedureUtil.toUserInfo(cloneSnapshotMsg.getUserInfo()); + snapshot = cloneSnapshotMsg.getSnapshot(); + hTableDescriptor = HTableDescriptor.convert(cloneSnapshotMsg.getTableSchema()); + if (cloneSnapshotMsg.getRegionInfoCount() == 0) { + newRegions = null; + } else { + newRegions = new ArrayList<HRegionInfo>(cloneSnapshotMsg.getRegionInfoCount()); + for (HBaseProtos.RegionInfo hri: cloneSnapshotMsg.getRegionInfoList()) { + newRegions.add(HRegionInfo.convert(hri)); + } + } + if (cloneSnapshotMsg.getParentToChildRegionsPairListCount() > 0) { + parentsToChildrenPairMap = new HashMap<String, Pair<String, String>>(); + for (MasterProcedureProtos.RestoreParentToChildRegionsPair parentToChildrenPair: + cloneSnapshotMsg.getParentToChildRegionsPairListList()) { + parentsToChildrenPairMap.put( + parentToChildrenPair.getParentRegionName(), + new Pair<String, String>( + parentToChildrenPair.getChild1RegionName(), + parentToChildrenPair.getChild2RegionName())); + } + } + // Make sure that the monitor status is set up + getMonitorStatus(); + } + + @Override + protected boolean acquireLock(final MasterProcedureEnv env) { + if (env.waitInitialized(this)) { + return false; + } + return env.getProcedureQueue().tryAcquireTableExclusiveLock(this, getTableName()); + } + + @Override + protected void releaseLock(final MasterProcedureEnv env) { + env.getProcedureQueue().releaseTableExclusiveLock(this, getTableName()); + } + + /** + * Action before any real action of cloning from snapshot. + * @param env MasterProcedureEnv + * @throws IOException + */ + private void prepareClone(final MasterProcedureEnv env) throws IOException { + final TableName tableName = getTableName(); + if (MetaTableAccessor.tableExists(env.getMasterServices().getConnection(), tableName)) { + throw new TableExistsException(getTableName()); + } + } + + /** + * Action before cloning from snapshot. + * @param env MasterProcedureEnv + * @throws IOException + * @throws InterruptedException + */ + private void preCloneSnapshot(final MasterProcedureEnv env) + throws IOException, InterruptedException { + if (!getTableName().isSystemTable()) { + // Check and update namespace quota + final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); + + SnapshotManifest manifest = SnapshotManifest.open( + env.getMasterConfiguration(), + mfs.getFileSystem(), + SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, mfs.getRootDir()), + snapshot); + + ProcedureSyncWait.getMasterQuotaManager(env) + .checkNamespaceTableAndRegionQuota(getTableName(), manifest.getRegionManifestsMap().size()); + } + + final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); + if (cpHost != null) { + user.doAs(new PrivilegedExceptionAction<Void>() { + @Override + public Void run() throws Exception { + cpHost.preCreateTableHandler(hTableDescriptor, null); + return null; + } + }); + } + } + + /** + * Action after cloning from snapshot. + * @param env MasterProcedureEnv + * @throws IOException + * @throws InterruptedException + */ + private void postCloneSnapshot(final MasterProcedureEnv env) + throws IOException, InterruptedException { + final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); + if (cpHost != null) { + final HRegionInfo[] regions = (newRegions == null) ? null : + newRegions.toArray(new HRegionInfo[newRegions.size()]); + user.doAs(new PrivilegedExceptionAction<Void>() { + @Override + public Void run() throws Exception { + cpHost.postCreateTableHandler(hTableDescriptor, regions); + return null; + } + }); + } + } + + /** + * Create regions in file system. + * @param env MasterProcedureEnv + * @throws IOException + */ + private List<HRegionInfo> createFilesystemLayout( + final MasterProcedureEnv env, + final HTableDescriptor hTableDescriptor, + final List<HRegionInfo> newRegions) throws IOException { + return createFsLayout(env, hTableDescriptor, newRegions, new CreateHdfsRegions() { + @Override + public List<HRegionInfo> createHdfsRegions( + final MasterProcedureEnv env, + final Path tableRootDir, final TableName tableName, + final List<HRegionInfo> newRegions) throws IOException { + + final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); + final FileSystem fs = mfs.getFileSystem(); + final Path rootDir = mfs.getRootDir(); + final Configuration conf = env.getMasterConfiguration(); + final ForeignExceptionDispatcher monitorException = new ForeignExceptionDispatcher(); + + getMonitorStatus().setStatus("Clone snapshot - creating regions for table: " + tableName); + + try { + // 1. Execute the on-disk Clone + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir); + SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshot); + RestoreSnapshotHelper restoreHelper = new RestoreSnapshotHelper( + conf, fs, manifest, hTableDescriptor, tableRootDir, monitorException, monitorStatus); + RestoreSnapshotHelper.RestoreMetaChanges metaChanges = restoreHelper.restoreHdfsRegions(); + + // Clone operation should not have stuff to restore or remove + Preconditions.checkArgument( + !metaChanges.hasRegionsToRestore(), "A clone should not have regions to restore"); + Preconditions.checkArgument( + !metaChanges.hasRegionsToRemove(), "A clone should not have regions to remove"); + + // At this point the clone is complete. Next step is enabling the table. + String msg = + "Clone snapshot="+ snapshot.getName() +" on table=" + tableName + " completed!"; + LOG.info(msg); + monitorStatus.setStatus(msg + " Waiting for table to be enabled..."); + + // 2. Let the next step to add the regions to meta + return metaChanges.getRegionsToAdd(); + } catch (Exception e) { + String msg = "clone snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) + + " failed because " + e.getMessage(); + LOG.error(msg, e); + IOException rse = new RestoreSnapshotException(msg, e, snapshot); + + // these handlers aren't futures so we need to register the error here. + monitorException.receive(new ForeignException("Master CloneSnapshotProcedure", rse)); + throw rse; + } + } + }); + } + + /** + * Create region layout in file system. + * @param env MasterProcedureEnv + * @throws IOException + */ + private List<HRegionInfo> createFsLayout( + final MasterProcedureEnv env, + final HTableDescriptor hTableDescriptor, + List<HRegionInfo> newRegions, + final CreateHdfsRegions hdfsRegionHandler) throws IOException { + final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); + final Path tempdir = mfs.getTempDir(); + + // 1. Create Table Descriptor + // using a copy of descriptor, table will be created enabling first + TableDescriptor underConstruction = new TableDescriptor(hTableDescriptor); + final Path tempTableDir = FSUtils.getTableDir(tempdir, hTableDescriptor.getTableName()); + ((FSTableDescriptors)(env.getMasterServices().getTableDescriptors())) + .createTableDescriptorForTableDirectory(tempTableDir, underConstruction, false); + + // 2. Create Regions + newRegions = hdfsRegionHandler.createHdfsRegions( + env, tempdir, hTableDescriptor.getTableName(), newRegions); + + // 3. Move Table temp directory to the hbase root location + CreateTableProcedure.moveTempDirectoryToHBaseRoot(env, hTableDescriptor, tempTableDir); + + return newRegions; + } + + /** + * Add regions to hbase:meta table. + * @param env MasterProcedureEnv + * @throws IOException + */ + private void addRegionsToMeta(final MasterProcedureEnv env) throws IOException { + newRegions = CreateTableProcedure.addTableToMeta(env, hTableDescriptor, newRegions); + + RestoreSnapshotHelper.RestoreMetaChanges metaChanges = + new RestoreSnapshotHelper.RestoreMetaChanges( + hTableDescriptor, parentsToChildrenPairMap); + metaChanges.updateMetaParentRegions(env.getMasterServices().getConnection(), newRegions); + } + + /** + * The procedure could be restarted from a different machine. If the variable is null, we need to + * retrieve it. + * @return traceEnabled + */ + private Boolean isTraceEnabled() { + if (traceEnabled == null) { + traceEnabled = LOG.isTraceEnabled(); + } + return traceEnabled; + } +} http://git-wip-us.apache.org/repos/asf/hbase/blob/e1d5c3d2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java index 8ce8335..f262edb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java @@ -299,7 +299,8 @@ public class CreateTableProcedure throws IOException, InterruptedException { if (!getTableName().isSystemTable()) { ProcedureSyncWait.getMasterQuotaManager(env) - .checkNamespaceTableAndRegionQuota(getTableName(), newRegions.size()); + .checkNamespaceTableAndRegionQuota( + getTableName(), (newRegions != null ? newRegions.size() : 0)); } final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); @@ -373,6 +374,16 @@ public class CreateTableProcedure hTableDescriptor.getTableName(), newRegions); // 3. Move Table temp directory to the hbase root location + moveTempDirectoryToHBaseRoot(env, hTableDescriptor, tempTableDir); + + return newRegions; + } + + protected static void moveTempDirectoryToHBaseRoot( + final MasterProcedureEnv env, + final HTableDescriptor hTableDescriptor, + final Path tempTableDir) throws IOException { + final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); final Path tableDir = FSUtils.getTableDir(mfs.getRootDir(), hTableDescriptor.getTableName()); FileSystem fs = mfs.getFileSystem(); if (!fs.delete(tableDir, true) && fs.exists(tableDir)) { @@ -382,7 +393,6 @@ public class CreateTableProcedure throw new IOException("Unable to move table from temp=" + tempTableDir + " to hbase root=" + tableDir); } - return newRegions; } protected static List<HRegionInfo> addTableToMeta(final MasterProcedureEnv env, @@ -446,7 +456,7 @@ public class CreateTableProcedure /** * Add the specified set of regions to the hbase:meta table. */ - protected static void addRegionsToMeta(final MasterProcedureEnv env, + private static void addRegionsToMeta(final MasterProcedureEnv env, final HTableDescriptor hTableDescriptor, final List<HRegionInfo> regionInfos) throws IOException { MetaTableAccessor.addRegionsToMeta(env.getMasterServices().getConnection(), http://git-wip-us.apache.org/repos/asf/hbase/blob/e1d5c3d2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterDDLOperationHelper.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterDDLOperationHelper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterDDLOperationHelper.java index abfb776..19bd015 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterDDLOperationHelper.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterDDLOperationHelper.java @@ -165,4 +165,17 @@ public final class MasterDDLOperationHelper { } return done; } + + /** + * Get the region info list of a table from meta if it is not already known by the caller. + **/ + public static List<HRegionInfo> getRegionInfoList( + final MasterProcedureEnv env, + final TableName tableName, + List<HRegionInfo> regionInfoList) throws IOException { + if (regionInfoList == null) { + regionInfoList = ProcedureSyncWait.getRegionsFromMeta(env, tableName); + } + return regionInfoList; + } } http://git-wip-us.apache.org/repos/asf/hbase/blob/e1d5c3d2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RestoreSnapshotProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RestoreSnapshotProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RestoreSnapshotProcedure.java new file mode 100644 index 0000000..1dc8944 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RestoreSnapshotProcedure.java @@ -0,0 +1,526 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.procedure; + +import java.io.InputStream; +import java.io.OutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.DoNotRetryIOException; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.TableNotFoundException; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.master.MasterFileSystem; +import org.apache.hadoop.hbase.master.MetricsSnapshot; +import org.apache.hadoop.hbase.master.RegionStates; +import org.apache.hadoop.hbase.monitoring.MonitoredTask; +import org.apache.hadoop.hbase.monitoring.TaskMonitor; +import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos; +import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos; +import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.RestoreSnapshotState; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.SnapshotManifest; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.security.UserGroupInformation; + +@InterfaceAudience.Private +public class RestoreSnapshotProcedure + extends StateMachineProcedure<MasterProcedureEnv, RestoreSnapshotState> + implements TableProcedureInterface { + private static final Log LOG = LogFactory.getLog(RestoreSnapshotProcedure.class); + + private final AtomicBoolean aborted = new AtomicBoolean(false); + + private HTableDescriptor modifiedHTableDescriptor; + private List<HRegionInfo> regionsToRestore = null; + private List<HRegionInfo> regionsToRemove = null; + private List<HRegionInfo> regionsToAdd = null; + private Map<String, Pair<String, String>> parentsToChildrenPairMap = + new HashMap<String, Pair<String, String>>(); + + private UserGroupInformation user; + private SnapshotDescription snapshot; + + // Monitor + private MonitoredTask monitorStatus = null; + + private Boolean traceEnabled = null; + + /** + * Constructor (for failover) + */ + public RestoreSnapshotProcedure() { + } + + /** + * Constructor + * @param env MasterProcedureEnv + * @param hTableDescriptor the table to operate on + * @param snapshot snapshot to restore from + * @throws IOException + */ + public RestoreSnapshotProcedure( + final MasterProcedureEnv env, + final HTableDescriptor hTableDescriptor, + final SnapshotDescription snapshot) + throws IOException { + // This is the new schema we are going to write out as this modification. + this.modifiedHTableDescriptor = hTableDescriptor; + // Snapshot information + this.snapshot = snapshot; + // User and owner information + this.user = env.getRequestUser().getUGI(); + this.setOwner(this.user.getShortUserName()); + + // Monitor + getMonitorStatus(); + } + + /** + * Set up monitor status if it is not created. + */ + private MonitoredTask getMonitorStatus() { + if (monitorStatus == null) { + monitorStatus = TaskMonitor.get().createStatus("Restoring snapshot '" + snapshot.getName() + + "' to table " + getTableName()); + } + return monitorStatus; + } + + @Override + protected Flow executeFromState(final MasterProcedureEnv env, final RestoreSnapshotState state) + throws InterruptedException { + if (isTraceEnabled()) { + LOG.trace(this + " execute state=" + state); + } + + // Make sure that the monitor status is set up + getMonitorStatus(); + + try { + switch (state) { + case RESTORE_SNAPSHOT_PRE_OPERATION: + // Verify if we can restore the table + prepareRestore(env); + setNextState(RestoreSnapshotState.RESTORE_SNAPSHOT_UPDATE_TABLE_DESCRIPTOR); + break; + case RESTORE_SNAPSHOT_UPDATE_TABLE_DESCRIPTOR: + updateTableDescriptor(env); + setNextState(RestoreSnapshotState.RESTORE_SNAPSHOT_WRITE_FS_LAYOUT); + break; + case RESTORE_SNAPSHOT_WRITE_FS_LAYOUT: + restoreSnapshot(env); + setNextState(RestoreSnapshotState.RESTORE_SNAPSHOT_UPDATE_META); + break; + case RESTORE_SNAPSHOT_UPDATE_META: + updateMETA(env); + return Flow.NO_MORE_STATE; + default: + throw new UnsupportedOperationException("unhandled state=" + state); + } + } catch (IOException e) { + LOG.error("Error trying to restore snapshot=" + getTableName() + " state=" + state, e); + setFailure("master-restore-snapshot", e); + } + return Flow.HAS_MORE_STATE; + } + + @Override + protected void rollbackState(final MasterProcedureEnv env, final RestoreSnapshotState state) + throws IOException { + if (isTraceEnabled()) { + LOG.trace(this + " rollback state=" + state); + } + + if (state == RestoreSnapshotState.RESTORE_SNAPSHOT_PRE_OPERATION) { + // nothing to rollback + return; + } + + // The restore snapshot doesn't have a rollback. The execution will succeed, at some point. + throw new UnsupportedOperationException("unhandled state=" + state); + } + + @Override + protected RestoreSnapshotState getState(final int stateId) { + return RestoreSnapshotState.valueOf(stateId); + } + + @Override + protected int getStateId(final RestoreSnapshotState state) { + return state.getNumber(); + } + + @Override + protected RestoreSnapshotState getInitialState() { + return RestoreSnapshotState.RESTORE_SNAPSHOT_PRE_OPERATION; + } + + @Override + protected void setNextState(final RestoreSnapshotState state) { + if (aborted.get()) { + setAbortFailure("create-table", "abort requested"); + } else { + super.setNextState(state); + } + } + + @Override + public TableName getTableName() { + return modifiedHTableDescriptor.getTableName(); + } + + @Override + public TableOperationType getTableOperationType() { + return TableOperationType.EDIT; // Restore is modifying a table + } + + @Override + public boolean abort(final MasterProcedureEnv env) { + aborted.set(true); + return true; + } + + @Override + public void toStringClassDetails(StringBuilder sb) { + sb.append(getClass().getSimpleName()); + sb.append(" (table="); + sb.append(getTableName()); + sb.append(" snapshot="); + sb.append(snapshot); + sb.append(")"); + } + + @Override + public void serializeStateData(final OutputStream stream) throws IOException { + super.serializeStateData(stream); + + MasterProcedureProtos.RestoreSnapshotStateData.Builder restoreSnapshotMsg = + MasterProcedureProtos.RestoreSnapshotStateData.newBuilder() + .setUserInfo(MasterProcedureUtil.toProtoUserInfo(this.user)) + .setSnapshot(this.snapshot) + .setModifiedTableSchema(modifiedHTableDescriptor.convert()); + + if (regionsToRestore != null) { + for (HRegionInfo hri: regionsToRestore) { + restoreSnapshotMsg.addRegionInfoForRestore(HRegionInfo.convert(hri)); + } + } + if (regionsToRemove != null) { + for (HRegionInfo hri: regionsToRemove) { + restoreSnapshotMsg.addRegionInfoForRemove(HRegionInfo.convert(hri)); + } + } + if (regionsToAdd != null) { + for (HRegionInfo hri: regionsToAdd) { + restoreSnapshotMsg.addRegionInfoForAdd(HRegionInfo.convert(hri)); + } + } + if (!parentsToChildrenPairMap.isEmpty()) { + final Iterator<Map.Entry<String, Pair<String, String>>> it = + parentsToChildrenPairMap.entrySet().iterator(); + while (it.hasNext()) { + final Map.Entry<String, Pair<String, String>> entry = it.next(); + + MasterProcedureProtos.RestoreParentToChildRegionsPair.Builder parentToChildrenPair = + MasterProcedureProtos.RestoreParentToChildRegionsPair.newBuilder() + .setParentRegionName(entry.getKey()) + .setChild1RegionName(entry.getValue().getFirst()) + .setChild2RegionName(entry.getValue().getSecond()); + restoreSnapshotMsg.addParentToChildRegionsPairList (parentToChildrenPair); + } + } + restoreSnapshotMsg.build().writeDelimitedTo(stream); + } + + @Override + public void deserializeStateData(final InputStream stream) throws IOException { + super.deserializeStateData(stream); + + MasterProcedureProtos.RestoreSnapshotStateData restoreSnapshotMsg = + MasterProcedureProtos.RestoreSnapshotStateData.parseDelimitedFrom(stream); + user = MasterProcedureUtil.toUserInfo(restoreSnapshotMsg.getUserInfo()); + snapshot = restoreSnapshotMsg.getSnapshot(); + modifiedHTableDescriptor = + HTableDescriptor.convert(restoreSnapshotMsg.getModifiedTableSchema()); + + if (restoreSnapshotMsg.getRegionInfoForRestoreCount() == 0) { + regionsToRestore = null; + } else { + regionsToRestore = + new ArrayList<HRegionInfo>(restoreSnapshotMsg.getRegionInfoForRestoreCount()); + for (HBaseProtos.RegionInfo hri: restoreSnapshotMsg.getRegionInfoForRestoreList()) { + regionsToRestore.add(HRegionInfo.convert(hri)); + } + } + if (restoreSnapshotMsg.getRegionInfoForRemoveCount() == 0) { + regionsToRemove = null; + } else { + regionsToRemove = + new ArrayList<HRegionInfo>(restoreSnapshotMsg.getRegionInfoForRemoveCount()); + for (HBaseProtos.RegionInfo hri: restoreSnapshotMsg.getRegionInfoForRemoveList()) { + regionsToRemove.add(HRegionInfo.convert(hri)); + } + } + if (restoreSnapshotMsg.getRegionInfoForAddCount() == 0) { + regionsToAdd = null; + } else { + regionsToAdd = new ArrayList<HRegionInfo>(restoreSnapshotMsg.getRegionInfoForAddCount()); + for (HBaseProtos.RegionInfo hri: restoreSnapshotMsg.getRegionInfoForAddList()) { + regionsToAdd.add(HRegionInfo.convert(hri)); + } + } + if (restoreSnapshotMsg.getParentToChildRegionsPairListCount() > 0) { + for (MasterProcedureProtos.RestoreParentToChildRegionsPair parentToChildrenPair: + restoreSnapshotMsg.getParentToChildRegionsPairListList()) { + parentsToChildrenPairMap.put( + parentToChildrenPair.getParentRegionName(), + new Pair<String, String>( + parentToChildrenPair.getChild1RegionName(), + parentToChildrenPair.getChild2RegionName())); + } + } + } + + @Override + protected boolean acquireLock(final MasterProcedureEnv env) { + if (env.waitInitialized(this)) { + return false; + } + return env.getProcedureQueue().tryAcquireTableExclusiveLock(this, getTableName()); + } + + @Override + protected void releaseLock(final MasterProcedureEnv env) { + env.getProcedureQueue().releaseTableExclusiveLock(this, getTableName()); + } + + /** + * Action before any real action of restoring from snapshot. + * @param env MasterProcedureEnv + * @throws IOException + */ + private void prepareRestore(final MasterProcedureEnv env) throws IOException { + final TableName tableName = getTableName(); + // Checks whether the table exists + if (!MetaTableAccessor.tableExists(env.getMasterServices().getConnection(), tableName)) { + throw new TableNotFoundException(tableName); + } + + // Check whether table is disabled. + env.getMasterServices().checkTableModifiable(tableName); + + // Check that we have at least 1 CF + if (modifiedHTableDescriptor.getColumnFamilies().length == 0) { + throw new DoNotRetryIOException("Table " + getTableName().toString() + + " should have at least one column family."); + } + + if (!getTableName().isSystemTable()) { + // Table already exist. Check and update the region quota for this table namespace. + final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); + SnapshotManifest manifest = SnapshotManifest.open( + env.getMasterConfiguration(), + mfs.getFileSystem(), + SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, mfs.getRootDir()), + snapshot); + int snapshotRegionCount = manifest.getRegionManifestsMap().size(); + int tableRegionCount = + ProcedureSyncWait.getMasterQuotaManager(env).getRegionCountOfTable(tableName); + + if (snapshotRegionCount > 0 && tableRegionCount != snapshotRegionCount) { + ProcedureSyncWait.getMasterQuotaManager(env).checkAndUpdateNamespaceRegionQuota( + tableName, snapshotRegionCount); + } + } + } + + /** + * Update descriptor + * @param env MasterProcedureEnv + * @throws IOException + **/ + private void updateTableDescriptor(final MasterProcedureEnv env) throws IOException { + env.getMasterServices().getTableDescriptors().add(modifiedHTableDescriptor); + } + + /** + * Execute the on-disk Restore + * @param env MasterProcedureEnv + * @throws IOException + **/ + private void restoreSnapshot(final MasterProcedureEnv env) throws IOException { + MasterFileSystem fileSystemManager = env.getMasterServices().getMasterFileSystem(); + FileSystem fs = fileSystemManager.getFileSystem(); + Path rootDir = fileSystemManager.getRootDir(); + final ForeignExceptionDispatcher monitorException = new ForeignExceptionDispatcher(); + + LOG.info("Starting restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot)); + try { + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir); + SnapshotManifest manifest = SnapshotManifest.open( + env.getMasterServices().getConfiguration(), fs, snapshotDir, snapshot); + RestoreSnapshotHelper restoreHelper = new RestoreSnapshotHelper( + env.getMasterServices().getConfiguration(), + fs, + manifest, + modifiedHTableDescriptor, + rootDir, + monitorException, + getMonitorStatus()); + + RestoreSnapshotHelper.RestoreMetaChanges metaChanges = restoreHelper.restoreHdfsRegions(); + regionsToRestore = metaChanges.getRegionsToRestore(); + regionsToRemove = metaChanges.getRegionsToRemove(); + regionsToAdd = metaChanges.getRegionsToAdd(); + parentsToChildrenPairMap = metaChanges.getParentToChildrenPairMap(); + } catch (IOException e) { + String msg = "restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) + + " failed in on-disk restore. Try re-running the restore command."; + LOG.error(msg, e); + monitorException.receive( + new ForeignException(env.getMasterServices().getServerName().toString(), e)); + throw new IOException(msg, e); + } + } + + /** + * Apply changes to hbase:meta + * @param env MasterProcedureEnv + * @throws IOException + **/ + private void updateMETA(final MasterProcedureEnv env) throws IOException { + try { + Connection conn = env.getMasterServices().getConnection(); + + // 1. Forces all the RegionStates to be offline + // + // The AssignmentManager keeps all the region states around + // with no possibility to remove them, until the master is restarted. + // This means that a region marked as SPLIT before the restore will never be assigned again. + // To avoid having all states around all the regions are switched to the OFFLINE state, + // which is the same state that the regions will be after a delete table. + forceRegionsOffline(env, regionsToAdd); + forceRegionsOffline(env, regionsToRestore); + forceRegionsOffline(env, regionsToRemove); + + getMonitorStatus().setStatus("Preparing to restore each region"); + + // 2. Applies changes to hbase:meta + // (2.1). Removes the current set of regions from META + // + // By removing also the regions to restore (the ones present both in the snapshot + // and in the current state) we ensure that no extra fields are present in META + // e.g. with a simple add addRegionToMeta() the splitA and splitB attributes + // not overwritten/removed, so you end up with old informations + // that are not correct after the restore. + if (regionsToRemove != null) { + MetaTableAccessor.deleteRegions(conn, regionsToRemove); + } + + // (2.2). Add the new set of regions to META + // + // At this point the old regions are no longer present in META. + // and the set of regions present in the snapshot will be written to META. + // All the information in hbase:meta are coming from the .regioninfo of each region present + // in the snapshot folder. + if (regionsToAdd != null) { + MetaTableAccessor.addRegionsToMeta( + conn, + regionsToAdd, + modifiedHTableDescriptor.getRegionReplication()); + } + + if (regionsToRestore != null) { + MetaTableAccessor.overwriteRegions( + conn, + regionsToRestore, + modifiedHTableDescriptor.getRegionReplication()); + } + + RestoreSnapshotHelper.RestoreMetaChanges metaChanges = + new RestoreSnapshotHelper.RestoreMetaChanges( + modifiedHTableDescriptor, parentsToChildrenPairMap); + metaChanges.updateMetaParentRegions(conn, regionsToAdd); + + // At this point the restore is complete. + LOG.info("Restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) + + " on table=" + getTableName() + " completed!"); + } catch (IOException e) { + final ForeignExceptionDispatcher monitorException = new ForeignExceptionDispatcher(); + String msg = "restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) + + " failed in meta update. Try re-running the restore command."; + LOG.error(msg, e); + monitorException.receive( + new ForeignException(env.getMasterServices().getServerName().toString(), e)); + throw new IOException(msg, e); + } + + monitorStatus.markComplete("Restore snapshot '"+ snapshot.getName() +"'!"); + MetricsSnapshot metricsSnapshot = new MetricsSnapshot(); + metricsSnapshot.addSnapshotRestore( + monitorStatus.getCompletionTimestamp() - monitorStatus.getStartTime()); + } + + /** + * Make sure that region states of the region list is in OFFLINE state. + * @param env MasterProcedureEnv + * @param hris region info list + **/ + private void forceRegionsOffline(final MasterProcedureEnv env, final List<HRegionInfo> hris) { + RegionStates states = env.getMasterServices().getAssignmentManager().getRegionStates(); + if (hris != null) { + for (HRegionInfo hri: hris) { + states.regionOffline(hri); + } + } + } + + /** + * The procedure could be restarted from a different machine. If the variable is null, we need to + * retrieve it. + * @return traceEnabled + */ + private Boolean isTraceEnabled() { + if (traceEnabled == null) { + traceEnabled = LOG.isTraceEnabled(); + } + return traceEnabled; + } +} http://git-wip-us.apache.org/repos/asf/hbase/blob/e1d5c3d2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java deleted file mode 100644 index 2a6dca8..0000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java +++ /dev/null @@ -1,195 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hbase.master.snapshot; - -import java.io.IOException; -import java.util.List; -import java.util.concurrent.CancellationException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; -import org.apache.hadoop.hbase.TableExistsException; -import org.apache.hadoop.hbase.errorhandling.ForeignException; -import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; -import org.apache.hadoop.hbase.master.MasterServices; -import org.apache.hadoop.hbase.master.MetricsSnapshot; -import org.apache.hadoop.hbase.master.SnapshotSentinel; -import org.apache.hadoop.hbase.master.handler.CreateTableHandler; -import org.apache.hadoop.hbase.monitoring.MonitoredTask; -import org.apache.hadoop.hbase.monitoring.TaskMonitor; -import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; -import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils; -import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException; -import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper; -import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; -import org.apache.hadoop.hbase.snapshot.SnapshotManifest; - -import com.google.common.base.Preconditions; - -/** - * Handler to Clone a snapshot. - * - * <p>Uses {@link RestoreSnapshotHelper} to create a new table with the same - * content of the specified snapshot. - */ -@InterfaceAudience.Private -public class CloneSnapshotHandler extends CreateTableHandler implements SnapshotSentinel { - private static final Log LOG = LogFactory.getLog(CloneSnapshotHandler.class); - - private final static String NAME = "Master CloneSnapshotHandler"; - - private final SnapshotDescription snapshot; - - private final ForeignExceptionDispatcher monitor; - private final MetricsSnapshot metricsSnapshot = new MetricsSnapshot(); - private final MonitoredTask status; - - private RestoreSnapshotHelper.RestoreMetaChanges metaChanges; - - private volatile boolean stopped = false; - - public CloneSnapshotHandler(final MasterServices masterServices, - final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) { - super(masterServices, masterServices.getMasterFileSystem(), hTableDescriptor, - masterServices.getConfiguration(), null, masterServices); - - // Snapshot information - this.snapshot = snapshot; - - // Monitor - this.monitor = new ForeignExceptionDispatcher(); - this.status = TaskMonitor.get().createStatus("Cloning snapshot '" + snapshot.getName() + - "' to table " + hTableDescriptor.getTableName()); - } - - @Override - public CloneSnapshotHandler prepare() throws NotAllMetaRegionsOnlineException, - TableExistsException, IOException { - return (CloneSnapshotHandler) super.prepare(); - } - - /** - * Create the on-disk regions, using the tableRootDir provided by the CreateTableHandler. - * The cloned table will be created in a temp directory, and then the CreateTableHandler - * will be responsible to add the regions returned by this method to hbase:meta and do the assignment. - */ - @Override - protected List<HRegionInfo> handleCreateHdfsRegions(final Path tableRootDir, - final TableName tableName) throws IOException { - status.setStatus("Creating regions for table: " + tableName); - FileSystem fs = fileSystemManager.getFileSystem(); - Path rootDir = fileSystemManager.getRootDir(); - - try { - // 1. Execute the on-disk Clone - Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir); - SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshot); - RestoreSnapshotHelper restoreHelper = new RestoreSnapshotHelper(conf, fs, - manifest, hTableDescriptor, tableRootDir, monitor, status); - metaChanges = restoreHelper.restoreHdfsRegions(); - - // Clone operation should not have stuff to restore or remove - Preconditions.checkArgument(!metaChanges.hasRegionsToRestore(), - "A clone should not have regions to restore"); - Preconditions.checkArgument(!metaChanges.hasRegionsToRemove(), - "A clone should not have regions to remove"); - - // At this point the clone is complete. Next step is enabling the table. - String msg = "Clone snapshot="+ snapshot.getName() +" on table=" + tableName + " completed!"; - LOG.info(msg); - status.setStatus(msg + " Waiting for table to be enabled..."); - - // 2. let the CreateTableHandler add the regions to meta - return metaChanges.getRegionsToAdd(); - } catch (Exception e) { - String msg = "clone snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) + - " failed because " + e.getMessage(); - LOG.error(msg, e); - IOException rse = new RestoreSnapshotException(msg, e, snapshot); - - // these handlers aren't futures so we need to register the error here. - this.monitor.receive(new ForeignException(NAME, rse)); - throw rse; - } - } - - @Override - protected void addRegionsToMeta(final List<HRegionInfo> regionInfos, - int regionReplication) - throws IOException { - super.addRegionsToMeta(regionInfos, regionReplication); - metaChanges.updateMetaParentRegions(this.server.getConnection(), regionInfos); - } - - @Override - protected void completed(final Throwable exception) { - this.stopped = true; - if (exception != null) { - status.abort("Snapshot '" + snapshot.getName() + "' clone failed because " + - exception.getMessage()); - } else { - status.markComplete("Snapshot '"+ snapshot.getName() +"' clone completed and table enabled!"); - } - metricsSnapshot.addSnapshotClone(status.getCompletionTimestamp() - status.getStartTime()); - super.completed(exception); - } - - @Override - public boolean isFinished() { - return this.stopped; - } - - @Override - public long getCompletionTimestamp() { - return this.status.getCompletionTimestamp(); - } - - @Override - public SnapshotDescription getSnapshot() { - return snapshot; - } - - @Override - public void cancel(String why) { - if (this.stopped) return; - this.stopped = true; - String msg = "Stopping clone snapshot=" + snapshot + " because: " + why; - LOG.info(msg); - status.abort(msg); - this.monitor.receive(new ForeignException(NAME, new CancellationException(why))); - } - - @Override - public ForeignException getExceptionIfFailed() { - return this.monitor.getException(); - } - - @Override - public void rethrowExceptionIfFailed() throws ForeignException { - monitor.rethrowException(); - } -} http://git-wip-us.apache.org/repos/asf/hbase/blob/e1d5c3d2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/RestoreSnapshotHandler.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/RestoreSnapshotHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/RestoreSnapshotHandler.java deleted file mode 100644 index 56faf76..0000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/RestoreSnapshotHandler.java +++ /dev/null @@ -1,245 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hbase.master.snapshot; - -import java.io.IOException; -import java.util.LinkedList; -import java.util.List; -import java.util.concurrent.CancellationException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.MetaTableAccessor; -import org.apache.hadoop.hbase.client.Connection; -import org.apache.hadoop.hbase.errorhandling.ForeignException; -import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; -import org.apache.hadoop.hbase.executor.EventType; -import org.apache.hadoop.hbase.master.AssignmentManager; -import org.apache.hadoop.hbase.master.MasterFileSystem; -import org.apache.hadoop.hbase.master.MasterServices; -import org.apache.hadoop.hbase.master.MetricsSnapshot; -import org.apache.hadoop.hbase.master.RegionStates; -import org.apache.hadoop.hbase.master.SnapshotSentinel; -import org.apache.hadoop.hbase.master.handler.TableEventHandler; -import org.apache.hadoop.hbase.monitoring.MonitoredTask; -import org.apache.hadoop.hbase.monitoring.TaskMonitor; -import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; -import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils; -import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException; -import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper; -import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; -import org.apache.hadoop.hbase.snapshot.SnapshotManifest; - -/** - * Handler to Restore a snapshot. - * - * <p>Uses {@link RestoreSnapshotHelper} to replace the table content with the - * data available in the snapshot. - */ -@InterfaceAudience.Private -public class RestoreSnapshotHandler extends TableEventHandler implements SnapshotSentinel { - private static final Log LOG = LogFactory.getLog(RestoreSnapshotHandler.class); - - private final HTableDescriptor hTableDescriptor; - private final SnapshotDescription snapshot; - - private final ForeignExceptionDispatcher monitor; - private final MetricsSnapshot metricsSnapshot = new MetricsSnapshot(); - private final MonitoredTask status; - - private volatile boolean stopped = false; - - public RestoreSnapshotHandler(final MasterServices masterServices, - final SnapshotDescription snapshot, final HTableDescriptor htd) throws IOException { - super(EventType.C_M_RESTORE_SNAPSHOT, htd.getTableName(), masterServices, masterServices); - - // Snapshot information - this.snapshot = snapshot; - - // Monitor - this.monitor = new ForeignExceptionDispatcher(); - - // Check table exists. - getTableDescriptor(); - - // This is the new schema we are going to write out as this modification. - this.hTableDescriptor = htd; - - this.status = TaskMonitor.get().createStatus( - "Restoring snapshot '" + snapshot.getName() + "' to table " - + hTableDescriptor.getTableName()); - } - - @Override - public RestoreSnapshotHandler prepare() throws IOException { - return (RestoreSnapshotHandler) super.prepare(); - } - - /** - * The restore table is executed in place. - * - The on-disk data will be restored - reference files are put in place without moving data - * - [if something fail here: you need to delete the table and re-run the restore] - * - hbase:meta will be updated - * - [if something fail here: you need to run hbck to fix hbase:meta entries] - * The passed in list gets changed in this method - */ - @Override - protected void handleTableOperation(List<HRegionInfo> hris) throws IOException { - MasterFileSystem fileSystemManager = masterServices.getMasterFileSystem(); - Connection conn = masterServices.getConnection(); - FileSystem fs = fileSystemManager.getFileSystem(); - Path rootDir = fileSystemManager.getRootDir(); - TableName tableName = hTableDescriptor.getTableName(); - - try { - // 1. Update descriptor - this.masterServices.getTableDescriptors().add(hTableDescriptor); - - // 2. Execute the on-disk Restore - LOG.debug("Starting restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot)); - Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir); - SnapshotManifest manifest = SnapshotManifest.open(masterServices.getConfiguration(), fs, - snapshotDir, snapshot); - RestoreSnapshotHelper restoreHelper = new RestoreSnapshotHelper( - masterServices.getConfiguration(), fs, manifest, - this.hTableDescriptor, rootDir, monitor, status); - RestoreSnapshotHelper.RestoreMetaChanges metaChanges = restoreHelper.restoreHdfsRegions(); - - // 3. Forces all the RegionStates to be offline - // - // The AssignmentManager keeps all the region states around - // with no possibility to remove them, until the master is restarted. - // This means that a region marked as SPLIT before the restore will never be assigned again. - // To avoid having all states around all the regions are switched to the OFFLINE state, - // which is the same state that the regions will be after a delete table. - forceRegionsOffline(metaChanges); - - // 4. Applies changes to hbase:meta - status.setStatus("Preparing to restore each region"); - - // 4.1 Removes the current set of regions from META - // - // By removing also the regions to restore (the ones present both in the snapshot - // and in the current state) we ensure that no extra fields are present in META - // e.g. with a simple add addRegionToMeta() the splitA and splitB attributes - // not overwritten/removed, so you end up with old informations - // that are not correct after the restore. - List<HRegionInfo> hrisToRemove = new LinkedList<HRegionInfo>(); - if (metaChanges.hasRegionsToRemove()) hrisToRemove.addAll(metaChanges.getRegionsToRemove()); - MetaTableAccessor.deleteRegions(conn, hrisToRemove); - - // 4.2 Add the new set of regions to META - // - // At this point the old regions are no longer present in META. - // and the set of regions present in the snapshot will be written to META. - // All the information in hbase:meta are coming from the .regioninfo of each region present - // in the snapshot folder. - hris.clear(); - if (metaChanges.hasRegionsToAdd()) hris.addAll(metaChanges.getRegionsToAdd()); - MetaTableAccessor.addRegionsToMeta(conn, hris, hTableDescriptor.getRegionReplication()); - if (metaChanges.hasRegionsToRestore()) { - MetaTableAccessor.overwriteRegions(conn, metaChanges.getRegionsToRestore(), - hTableDescriptor.getRegionReplication()); - } - metaChanges.updateMetaParentRegions(this.server.getConnection(), hris); - - // At this point the restore is complete. Next step is enabling the table. - LOG.info("Restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) + - " on table=" + tableName + " completed!"); - } catch (IOException e) { - String msg = "restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) - + " failed. Try re-running the restore command."; - LOG.error(msg, e); - monitor.receive(new ForeignException(masterServices.getServerName().toString(), e)); - throw new RestoreSnapshotException(msg, e); - } - } - - private void forceRegionsOffline(final RestoreSnapshotHelper.RestoreMetaChanges metaChanges) { - forceRegionsOffline(metaChanges.getRegionsToAdd()); - forceRegionsOffline(metaChanges.getRegionsToRestore()); - forceRegionsOffline(metaChanges.getRegionsToRemove()); - } - - private void forceRegionsOffline(final List<HRegionInfo> hris) { - AssignmentManager am = this.masterServices.getAssignmentManager(); - RegionStates states = am.getRegionStates(); - if (hris != null) { - for (HRegionInfo hri: hris) { - states.regionOffline(hri); - } - } - } - - @Override - protected void completed(final Throwable exception) { - this.stopped = true; - if (exception != null) { - status.abort("Restore snapshot '" + snapshot.getName() + "' failed because " + - exception.getMessage()); - } else { - status.markComplete("Restore snapshot '"+ snapshot.getName() +"'!"); - } - metricsSnapshot.addSnapshotRestore(status.getCompletionTimestamp() - status.getStartTime()); - super.completed(exception); - } - - @Override - public boolean isFinished() { - return this.stopped; - } - - @Override - public long getCompletionTimestamp() { - return this.status.getCompletionTimestamp(); - } - - @Override - public SnapshotDescription getSnapshot() { - return snapshot; - } - - @Override - public void cancel(String why) { - if (this.stopped) return; - this.stopped = true; - String msg = "Stopping restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) - + " because: " + why; - LOG.info(msg); - CancellationException ce = new CancellationException(why); - this.monitor.receive(new ForeignException(masterServices.getServerName().toString(), ce)); - } - - @Override - public ForeignException getExceptionIfFailed() { - return this.monitor.getException(); - } - - @Override - public void rethrowExceptionIfFailed() throws ForeignException { - monitor.rethrowException(); - } -}