gvprathyusha6 commented on code in PR #8248: URL: https://github.com/apache/hbase/pull/8248#discussion_r3334378965
########## hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MapreduceRestoreSnapshotHelper.java: ########## @@ -0,0 +1,831 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.mapreduce; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.stream.Collectors; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.io.HFileLink; +import org.apache.hadoop.hbase.io.Reference; +import org.apache.hadoop.hbase.mob.MobUtils; +import org.apache.hadoop.hbase.monitoring.MonitoredTask; +import org.apache.hadoop.hbase.monitoring.TaskMonitor; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; +import org.apache.hadoop.hbase.regionserver.StoreContext; +import org.apache.hadoop.hbase.regionserver.StoreFileInfo; +import org.apache.hadoop.hbase.regionserver.StoreUtils; +import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; +import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.SnapshotManifest; +import org.apache.hadoop.hbase.snapshot.SnapshotTTLExpiredException; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.MapreduceHFileArchiver; +import org.apache.hadoop.hbase.util.ModifyRegionUtils; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.io.IOUtils; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Helper to Restore/Clone a Snapshot + * <p> + * The helper assumes that a table is already created, and by calling restore() the content present + * in the snapshot will be restored as the new content of the table. + * <p> + * Clone from Snapshot: If the target table is empty, the restore operation is just a "clone + * operation", where the only operations are: + * <ul> + * <li>for each region in the snapshot create a new region (note that the region will have a + * different name, since the encoding contains the table name) + * <li>for each file in the region create a new HFileLink to point to the original file. + * <li>restore the logs, if any + * </ul> + * <p> + * Restore from Snapshot: + * <ul> + * <li>for each region in the table verify which are available in the snapshot and which are not + * <ul> + * <li>if the region is not present in the snapshot, remove it. + * <li>if the region is present in the snapshot + * <ul> + * <li>for each file in the table region verify which are available in the snapshot + * <ul> + * <li>if the hfile is not present in the snapshot, remove it + * <li>if the hfile is present, keep it (nothing to do) + * </ul> + * <li>for each file in the snapshot region but not in the table + * <ul> + * <li>create a new HFileLink that point to the original file + * </ul> + * </ul> + * </ul> + * <li>for each region in the snapshot not present in the current table state + * <ul> + * <li>create a new region and for each file in the region create a new HFileLink (This is the same + * as the clone operation) + * </ul> + * <li>restore the logs, if any + * </ul> + */ [email protected] +public final class MapreduceRestoreSnapshotHelper { + + private static final Logger LOG = LoggerFactory.getLogger(MapreduceRestoreSnapshotHelper.class); + private final Map<byte[], byte[]> regionsMap = new TreeMap<>(Bytes.BYTES_COMPARATOR); + + private final Map<String, Pair<String, String>> parentsMap = new HashMap<>(); + + private final ForeignExceptionDispatcher monitor; + private final MonitoredTask status; + + private final SnapshotManifest snapshotManifest; + private final SnapshotDescription snapshotDesc; + private final TableName snapshotTable; + + private final TableDescriptor tableDesc; + private final Path rootDir; + private final Path tableDir; + + private final Configuration conf; + private final FileSystem fs; + private final boolean createBackRefs; + + public MapreduceRestoreSnapshotHelper(final Configuration conf, final FileSystem fs, + final SnapshotManifest manifest, final TableDescriptor tableDescriptor, final Path rootDir, + final ForeignExceptionDispatcher monitor, final MonitoredTask status, + final boolean createBackRefs) { + this.fs = fs; + this.conf = conf; + this.snapshotManifest = manifest; + this.snapshotDesc = manifest.getSnapshotDescription(); + this.snapshotTable = TableName.valueOf(snapshotDesc.getTable()); + this.tableDesc = tableDescriptor; + this.rootDir = rootDir; + this.tableDir = CommonFSUtils.getTableDir(rootDir, tableDesc.getTableName()); + this.monitor = monitor; + this.status = status; + this.createBackRefs = createBackRefs; + } + + /** + * Restore the on-disk table to a specified snapshot state. + * @return the set of regions touched by the restore operation + */ + public MapreduceRestoreSnapshotHelper.RestoreMetaChanges restoreHdfsRegions() throws IOException { Review Comment: We dont need these RestoreMetaChanges for map reduce right? Are we using it somewhere ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
