HBASE-18038 Rename StoreFile to HStoreFile and add a StoreFile interface for CP
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/ee0f148c Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/ee0f148c Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/ee0f148c Branch: refs/heads/master Commit: ee0f148c730e0ae1cb616406166487fba78a2298 Parents: 8bfa8aa Author: zhangduo <zhang...@apache.org> Authored: Tue Jun 6 16:35:19 2017 +0800 Committer: zhangduo <zhang...@apache.org> Committed: Tue Jun 6 20:36:38 2017 +0800 ---------------------------------------------------------------------- .../assignment/MergeTableRegionsProcedure.java | 9 +- .../assignment/SplitTableRegionProcedure.java | 13 +- .../master/balancer/StochasticLoadBalancer.java | 3 +- .../apache/hadoop/hbase/mob/CachedMobFile.java | 3 +- .../org/apache/hadoop/hbase/mob/MobFile.java | 3 +- .../org/apache/hadoop/hbase/mob/MobUtils.java | 5 +- .../compactions/PartitionedMobCompactor.java | 10 +- .../hbase/regionserver/CompactionTool.java | 38 +- .../regionserver/DateTieredStoreEngine.java | 2 +- .../hbase/regionserver/DefaultStoreEngine.java | 2 +- .../regionserver/DefaultStoreFileManager.java | 8 +- .../hadoop/hbase/regionserver/HMobStore.java | 2 +- .../hadoop/hbase/regionserver/HRegion.java | 6 +- .../hadoop/hbase/regionserver/HStore.java | 30 +- .../hadoop/hbase/regionserver/HStoreFile.java | 560 +++++++++ .../hadoop/hbase/regionserver/StoreFile.java | 736 ++---------- .../regionserver/StoreFileComparators.java | 96 ++ .../hbase/regionserver/StoreFileInfo.java | 1 - .../hbase/regionserver/StoreFileScanner.java | 20 +- .../hbase/regionserver/StoreFileWriter.java | 21 +- .../hadoop/hbase/regionserver/StoreScanner.java | 4 +- .../hadoop/hbase/regionserver/StoreUtils.java | 94 +- .../regionserver/StripeStoreFileManager.java | 20 +- .../compactions/CompactionRequest.java | 45 +- .../regionserver/compactions/Compactor.java | 4 +- .../compactions/DateTieredCompactionPolicy.java | 48 +- .../compactions/DateTieredCompactor.java | 4 +- .../compactions/RatioBasedCompactionPolicy.java | 7 +- .../compactions/SortedCompactionPolicy.java | 54 +- .../hadoop/hbase/snapshot/SnapshotManifest.java | 13 +- .../hadoop/hbase/util/BloomFilterFactory.java | 2 +- .../hbase/coprocessor/SimpleRegionObserver.java | 2 +- .../hbase/mapreduce/TestHFileOutputFormat2.java | 5 +- .../apache/hadoop/hbase/mob/TestMobFile.java | 6 +- .../hbase/mob/compactions/TestMobCompactor.java | 3 +- .../TestPartitionedMobCompactor.java | 40 +- .../hbase/namespace/TestNamespaceAuditor.java | 2 + .../AbstractTestDateTieredCompactionPolicy.java | 9 +- .../regionserver/DataBlockEncodingTool.java | 2 +- .../EncodedSeekPerformanceTest.java | 4 +- .../hbase/regionserver/MockStoreFile.java | 59 +- .../regionserver/TestCacheOnWriteInSchema.java | 2 +- .../TestCompactionArchiveConcurrentClose.java | 16 +- .../TestCompactionArchiveIOException.java | 11 +- .../regionserver/TestCompoundBloomFilter.java | 10 +- .../regionserver/TestEncryptionKeyRotation.java | 8 +- .../TestEncryptionRandomKeying.java | 8 +- .../hbase/regionserver/TestFSErrorsExposed.java | 8 +- .../hbase/regionserver/TestHMobStore.java | 29 +- .../hadoop/hbase/regionserver/TestHRegion.java | 1 - .../regionserver/TestHRegionReplayEvents.java | 14 +- .../regionserver/TestHRegionServerBulkLoad.java | 8 +- .../hbase/regionserver/TestHStoreFile.java | 1106 ++++++++++++++++++ .../regionserver/TestMobStoreCompaction.java | 15 +- .../hbase/regionserver/TestRegionReplicas.java | 2 +- .../regionserver/TestReversibleScanners.java | 14 +- .../TestSplitTransactionOnCluster.java | 3 +- .../hadoop/hbase/regionserver/TestStore.java | 5 +- .../hbase/regionserver/TestStoreFile.java | 1106 ------------------ .../regionserver/TestStripeStoreEngine.java | 2 + .../TestStripeStoreFileManager.java | 13 +- .../compactions/MockStoreFileGenerator.java | 9 +- .../compactions/PerfTestCompactionPolicies.java | 21 +- .../TestCompactedHFilesDischarger.java | 13 +- .../compactions/TestDateTieredCompactor.java | 3 +- .../compactions/TestStripeCompactionPolicy.java | 8 +- .../visibility/TestVisibilityLabels.java | 4 +- .../apache/hadoop/hbase/util/HFileTestUtil.java | 13 +- 68 files changed, 2331 insertions(+), 2116 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java index 2792ea2..83d5506 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java @@ -56,7 +56,7 @@ import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; -import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.regionserver.HStoreFile; import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; @@ -605,11 +605,8 @@ public class MergeTableRegionsProcedure final CacheConfig cacheConf = new CacheConfig(conf, hcd); for (StoreFileInfo storeFileInfo: storeFiles) { // Create reference file(s) of the region in mergedDir - regionFs.mergeStoreFile( - mergedRegion, - family, - new StoreFile( - mfs.getFileSystem(), storeFileInfo, conf, cacheConf, hcd.getBloomFilterType()), + regionFs.mergeStoreFile(mergedRegion, family, new HStoreFile(mfs.getFileSystem(), + storeFileInfo, conf, cacheConf, hcd.getBloomFilterType(), true), mergedDir); } } http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java index a663608..2e2aa5d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hbase.master.assignment; +import com.google.common.annotations.VisibleForTesting; + import java.io.IOException; import java.io.InputStream; import java.io.InterruptedIOException; @@ -62,6 +64,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState; import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; import org.apache.hadoop.hbase.regionserver.HStore; +import org.apache.hadoop.hbase.regionserver.HStoreFile; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse; @@ -71,8 +74,6 @@ import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Threads; -import com.google.common.annotations.VisibleForTesting; - /** * The procedure to split a region in a table. * Takes lock on the parent region. @@ -525,11 +526,9 @@ public class SplitTableRegionProcedure if (storeFiles != null && storeFiles.size() > 0) { final CacheConfig cacheConf = new CacheConfig(conf, hcd); for (StoreFileInfo storeFileInfo: storeFiles) { - StoreFileSplitter sfs = new StoreFileSplitter( - regionFs, - family.getBytes(), - new StoreFile( - mfs.getFileSystem(), storeFileInfo, conf, cacheConf, hcd.getBloomFilterType())); + StoreFileSplitter sfs = + new StoreFileSplitter(regionFs, family.getBytes(), new HStoreFile(mfs.getFileSystem(), + storeFileInfo, conf, cacheConf, hcd.getBloomFilterType(), true)); futures.add(threadPool.submit(sfs)); } } http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 4b96bc6..fd4b091 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -28,7 +28,6 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Random; -import com.google.common.collect.Lists; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -51,6 +50,8 @@ import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.SwapRegi import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import com.google.common.collect.Lists; + /** * <p>This is a best effort load balancer. Given a Cost function F(C) => x It will * randomly try and mutate the cluster to Cprime. If F(Cprime) < F(C) then the http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/CachedMobFile.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/CachedMobFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/CachedMobFile.java index 90d1f2d..b7ebee3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/CachedMobFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/CachedMobFile.java @@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.regionserver.BloomType; +import org.apache.hadoop.hbase.regionserver.HStoreFile; import org.apache.hadoop.hbase.regionserver.StoreFile; /** @@ -46,7 +47,7 @@ public class CachedMobFile extends MobFile implements Comparable<CachedMobFile> CacheConfig cacheConf) throws IOException { // XXX: primaryReplica is only used for constructing the key of block cache so it is not a // critical problem if we pass the wrong value, so here we always pass true. Need to fix later. - StoreFile sf = new StoreFile(fs, path, conf, cacheConf, BloomType.NONE, true); + StoreFile sf = new HStoreFile(fs, path, conf, cacheConf, BloomType.NONE, true); return new CachedMobFile(sf); } http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java index 73355e8..7e3e36f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.regionserver.BloomType; +import org.apache.hadoop.hbase.regionserver.HStoreFile; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.regionserver.StoreFileScanner; @@ -146,7 +147,7 @@ public class MobFile { throws IOException { // XXX: primaryReplica is only used for constructing the key of block cache so it is not a // critical problem if we pass the wrong value, so here we always pass true. Need to fix later. - StoreFile sf = new StoreFile(fs, path, conf, cacheConf, BloomType.NONE, true); + StoreFile sf = new HStoreFile(fs, path, conf, cacheConf, BloomType.NONE, true); return new MobFile(sf); } } http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobUtils.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobUtils.java index 06c5001..a869b7a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobUtils.java @@ -69,6 +69,7 @@ import org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.C import org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactor; import org.apache.hadoop.hbase.regionserver.BloomType; import org.apache.hadoop.hbase.regionserver.HStore; +import org.apache.hadoop.hbase.regionserver.HStoreFile; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.regionserver.StoreFileWriter; import org.apache.hadoop.hbase.util.Bytes; @@ -334,7 +335,7 @@ public final class MobUtils { LOG.debug(fileName + " is an expired file"); } filesToClean - .add(new StoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true)); + .add(new HStoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true)); } } catch (Exception e) { LOG.error("Cannot parse the fileName " + fileName, e); @@ -722,7 +723,7 @@ public final class MobUtils { CacheConfig cacheConfig, boolean primaryReplica) throws IOException { StoreFile storeFile = null; try { - storeFile = new StoreFile(fs, path, conf, cacheConfig, BloomType.NONE, primaryReplica); + storeFile = new HStoreFile(fs, path, conf, cacheConfig, BloomType.NONE, primaryReplica); storeFile.initReader(); } catch (IOException e) { LOG.error("Failed to open mob file[" + path + "], keep it in temp directory.", e); http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/compactions/PartitionedMobCompactor.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/compactions/PartitionedMobCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/compactions/PartitionedMobCompactor.java index 05c7076..5fe0002 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/compactions/PartitionedMobCompactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/compactions/PartitionedMobCompactor.java @@ -36,7 +36,6 @@ import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; -import com.google.common.annotations.VisibleForTesting; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -74,6 +73,7 @@ import org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.C import org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.CompactionPartitionId; import org.apache.hadoop.hbase.regionserver.BloomType; import org.apache.hadoop.hbase.regionserver.HStore; +import org.apache.hadoop.hbase.regionserver.HStoreFile; import org.apache.hadoop.hbase.regionserver.ScanInfo; import org.apache.hadoop.hbase.regionserver.ScanType; import org.apache.hadoop.hbase.regionserver.ScannerContext; @@ -87,6 +87,8 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.Pair; +import com.google.common.annotations.VisibleForTesting; + /** * An implementation of {@link MobCompactor} that compacts the mob files in partitions. */ @@ -335,7 +337,7 @@ public class PartitionedMobCompactor extends MobCompactor { for (CompactionDelPartition delPartition : request.getDelPartitions()) { for (Path newDelPath : delPartition.listDelFiles()) { StoreFile sf = - new StoreFile(fs, newDelPath, conf, compactionCacheConfig, BloomType.NONE, true); + new HStoreFile(fs, newDelPath, conf, compactionCacheConfig, BloomType.NONE, true); // pre-create reader of a del file to avoid race condition when opening the reader in each // partition. sf.initReader(); @@ -551,7 +553,7 @@ public class PartitionedMobCompactor extends MobCompactor { // add the selected mob files and del files into filesToCompact List<StoreFile> filesToCompact = new ArrayList<>(); for (int i = offset; i < batch + offset; i++) { - StoreFile sf = new StoreFile(fs, files.get(i).getPath(), conf, compactionCacheConfig, + StoreFile sf = new HStoreFile(fs, files.get(i).getPath(), conf, compactionCacheConfig, BloomType.NONE, true); filesToCompact.add(sf); } @@ -733,7 +735,7 @@ public class PartitionedMobCompactor extends MobCompactor { continue; } for (int i = offset; i < batch + offset; i++) { - batchedDelFiles.add(new StoreFile(fs, delFilePaths.get(i), conf, compactionCacheConfig, + batchedDelFiles.add(new HStoreFile(fs, delFilePaths.get(i), conf, compactionCacheConfig, BloomType.NONE, true)); } // compact the del files in a batch. http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java index bea3e7f..e1d2ea1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java @@ -27,41 +27,39 @@ import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.util.LineReader; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.input.FileSplit; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.HDFSBlocksDistribution; -import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.regionserver.HRegion; -import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; -import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; -import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.mapreduce.JobUtil; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; +import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; +import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.FSTableDescriptors; import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.input.FileSplit; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; +import org.apache.hadoop.util.LineReader; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; /* * The CompactionTool allows to execute a compaction specifying a: http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java index 2d86e39..7fe4c22 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java @@ -58,7 +58,7 @@ public class DateTieredStoreEngine extends StoreEngine<DefaultStoreFlusher, throws IOException { this.compactionPolicy = new DateTieredCompactionPolicy(conf, store); this.storeFileManager = - new DefaultStoreFileManager(kvComparator, StoreFile.Comparators.SEQ_ID_MAX_TIMESTAMP, conf, + new DefaultStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID_MAX_TIMESTAMP, conf, compactionPolicy.getConf()); this.storeFlusher = new DefaultStoreFlusher(conf, store); this.compactor = new DateTieredCompactor(conf, store); http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java index 8e94e2f..5c7b817 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java @@ -69,7 +69,7 @@ public class DefaultStoreEngine extends StoreEngine< createCompactionPolicy(conf, store); createStoreFlusher(conf, store); storeFileManager = - new DefaultStoreFileManager(kvComparator, StoreFile.Comparators.SEQ_ID, conf, + new DefaultStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID, conf, compactionPolicy.getConf()); } http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java index da25df5..f4f9aa6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java @@ -25,6 +25,7 @@ import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; +import java.util.Optional; import com.google.common.collect.ImmutableCollection; import com.google.common.collect.ImmutableList; @@ -172,10 +173,13 @@ class DefaultStoreFileManager implements StoreFileManager { @Override public final byte[] getSplitPoint() throws IOException { - if (this.storefiles.isEmpty()) { + List<StoreFile> storefiles = this.storefiles; + if (storefiles.isEmpty()) { return null; } - return StoreUtils.getLargestFile(this.storefiles).getFileSplitPoint(this.kvComparator); + Optional<StoreFile> largestFile = StoreUtils.getLargestFile(storefiles); + return largestFile.isPresent() + ? StoreUtils.getFileSplitPoint(largestFile.get(), kvComparator).orElse(null) : null; } @Override http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java index 73c8a1f..c240df3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java @@ -293,7 +293,7 @@ public class HMobStore extends HStore { private void validateMobFile(Path path) throws IOException { StoreFile storeFile = null; try { - storeFile = new StoreFile(region.getFilesystem(), path, conf, this.mobCacheConfig, + storeFile = new HStoreFile(region.getFilesystem(), path, conf, this.mobCacheConfig, BloomType.NONE, isPrimaryReplicaStore()); storeFile.initReader(); } catch (IOException e) { http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index a620a25..7f9c766 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -1456,7 +1456,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi * time-sensitive thread. * * @return Vector of all the storage files that the HRegion's component - * HStores make use of. It's a list of all HStoreFile objects. Returns empty + * HStores make use of. It's a list of all StoreFile objects. Returns empty * vector if already closed and null if judged that it should not close. * * @throws IOException e @@ -1497,7 +1497,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi * * @param abort true if server is aborting (only during testing) * @return Vector of all the storage files that the HRegion's component - * HStores make use of. It's a list of HStoreFile objects. Can be null if + * HStores make use of. It's a list of StoreFile objects. Can be null if * we are not to close at this time or we are already closed. * * @throws IOException e @@ -4204,7 +4204,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi Set<StoreFile> fakeStoreFiles = new HashSet<>(files.size()); for (Path file: files) { fakeStoreFiles.add( - new StoreFile(getRegionFileSystem().getFileSystem(), file, this.conf, null, null, true)); + new HStoreFile(getRegionFileSystem().getFileSystem(), file, this.conf, null, null, true)); } getRegionFileSystem().removeStoreFiles(fakeFamilyName, fakeStoreFiles); } else { http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index 051471e..17e255a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -18,13 +18,6 @@ */ package org.apache.hadoop.hbase.regionserver; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableCollection; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; - import java.io.IOException; import java.io.InterruptedIOException; import java.net.InetSocketAddress; @@ -53,7 +46,15 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.CompoundConfiguration; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.MemoryCompactionPolicy; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.backup.FailedArchiveException; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.Scan; @@ -90,6 +91,13 @@ import org.apache.hadoop.hbase.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableCollection; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + /** * A Store holds a column family in a Region. Its a memstore and a set of zero * or more StoreFiles, which stretch backwards over time. @@ -455,12 +463,12 @@ public class HStore implements Store { */ @Override public long getMaxSequenceId() { - return StoreFile.getMaxSequenceIdInList(this.getStorefiles()); + return StoreUtils.getMaxSequenceIdInList(this.getStorefiles()); } @Override public long getMaxMemstoreTS() { - return StoreFile.getMaxMemstoreTSInList(this.getStorefiles()); + return StoreUtils.getMaxMemstoreTSInList(this.getStorefiles()); } /** @@ -655,7 +663,7 @@ public class HStore implements Store { private StoreFile createStoreFileAndReader(final StoreFileInfo info) throws IOException { info.setRegionCoprocessorHost(this.region.getCoprocessorHost()); - StoreFile storeFile = new StoreFile(this.getFileSystem(), info, this.conf, this.cacheConf, + StoreFile storeFile = new HStoreFile(this.getFileSystem(), info, this.conf, this.cacheConf, this.family.getBloomFilterType(), isPrimaryReplicaStore()); storeFile.initReader(); return storeFile; http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java new file mode 100644 index 0000000..2df15f9 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java @@ -0,0 +1,560 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; +import java.util.Collections; +import java.util.Comparator; +import java.util.Map; +import java.util.OptionalLong; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.HDFSBlocksDistribution; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.io.TimeRange; +import org.apache.hadoop.hbase.io.hfile.BlockType; +import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.util.BloomFilterFactory; +import org.apache.hadoop.hbase.util.Bytes; + +import com.google.common.annotations.VisibleForTesting; + +/** + * A Store data file. Stores usually have one or more of these files. They + * are produced by flushing the memstore to disk. To + * create, instantiate a writer using {@link StoreFileWriter.Builder} + * and append data. Be sure to add any metadata before calling close on the + * Writer (Use the appendMetadata convenience methods). On close, a StoreFile + * is sitting in the Filesystem. To refer to it, create a StoreFile instance + * passing filesystem and path. To read, call {@link #initReader()} + * <p>StoreFiles may also reference store files in another Store. + * + * The reason for this weird pattern where you use a different instance for the + * writer and a reader is that we write once but read a lot more. + */ +@InterfaceAudience.Private +public class HStoreFile implements StoreFile { + + private static final Log LOG = LogFactory.getLog(HStoreFile.class.getName()); + + private static final boolean DEFAULT_STORE_FILE_READER_NO_READAHEAD = false; + + private final StoreFileInfo fileInfo; + private final FileSystem fs; + + // Block cache configuration and reference. + private final CacheConfig cacheConf; + + // Counter that is incremented every time a scanner is created on the + // store file. It is decremented when the scan on the store file is + // done. + private final AtomicInteger refCount = new AtomicInteger(0); + + private final boolean noReadahead; + + private final boolean primaryReplica; + + // Indicates if the file got compacted + private volatile boolean compactedAway = false; + + // Keys for metadata stored in backing HFile. + // Set when we obtain a Reader. + private long sequenceid = -1; + + // max of the MemstoreTS in the KV's in this store + // Set when we obtain a Reader. + private long maxMemstoreTS = -1; + + // firstKey, lastkey and cellComparator will be set when openReader. + private Cell firstKey; + + private Cell lastKey; + + private Comparator<Cell> comparator; + + @Override + public CacheConfig getCacheConf() { + return cacheConf; + } + + @Override + public Cell getFirstKey() { + return firstKey; + } + + @Override + public Cell getLastKey() { + return lastKey; + } + + @Override + public Comparator<Cell> getComparator() { + return comparator; + } + + @Override + public long getMaxMemstoreTS() { + return maxMemstoreTS; + } + + // If true, this file was product of a major compaction. Its then set + // whenever you get a Reader. + private AtomicBoolean majorCompaction = null; + + // If true, this file should not be included in minor compactions. + // It's set whenever you get a Reader. + private boolean excludeFromMinorCompaction = false; + + /** + * Map of the metadata entries in the corresponding HFile. Populated when Reader is opened + * after which it is not modified again. + */ + private Map<byte[], byte[]> metadataMap; + + // StoreFile.Reader + private volatile StoreFileReader reader; + + /** + * Bloom filter type specified in column family configuration. Does not + * necessarily correspond to the Bloom filter type present in the HFile. + */ + private final BloomType cfBloomType; + + /** + * Constructor, loads a reader and it's indices, etc. May allocate a substantial amount of ram + * depending on the underlying files (10-20MB?). + * @param fs The current file system to use. + * @param p The path of the file. + * @param conf The current configuration. + * @param cacheConf The cache configuration and block cache reference. + * @param cfBloomType The bloom type to use for this store file as specified by column family + * configuration. This may or may not be the same as the Bloom filter type actually + * present in the HFile, because column family configuration might change. If this is + * {@link BloomType#NONE}, the existing Bloom filter is ignored. + * @deprecated Now we will specific whether the StoreFile is for primary replica when + * constructing, so please use {@link #HStoreFile(FileSystem, Path, Configuration, + * CacheConfig, BloomType, boolean)} directly. + */ + @Deprecated + public HStoreFile(final FileSystem fs, final Path p, final Configuration conf, + final CacheConfig cacheConf, final BloomType cfBloomType) throws IOException { + this(fs, new StoreFileInfo(conf, fs, p), conf, cacheConf, cfBloomType); + } + + /** + * Constructor, loads a reader and it's indices, etc. May allocate a substantial amount of ram + * depending on the underlying files (10-20MB?). + * @param fs The current file system to use. + * @param p The path of the file. + * @param conf The current configuration. + * @param cacheConf The cache configuration and block cache reference. + * @param cfBloomType The bloom type to use for this store file as specified by column family + * configuration. This may or may not be the same as the Bloom filter type actually + * present in the HFile, because column family configuration might change. If this is + * {@link BloomType#NONE}, the existing Bloom filter is ignored. + * @param primaryReplica true if this is a store file for primary replica, otherwise false. + * @throws IOException + */ + public HStoreFile(FileSystem fs, Path p, Configuration conf, CacheConfig cacheConf, + BloomType cfBloomType, boolean primaryReplica) throws IOException { + this(fs, new StoreFileInfo(conf, fs, p), conf, cacheConf, cfBloomType, primaryReplica); + } + + /** + * Constructor, loads a reader and it's indices, etc. May allocate a substantial amount of ram + * depending on the underlying files (10-20MB?). + * @param fs The current file system to use. + * @param fileInfo The store file information. + * @param conf The current configuration. + * @param cacheConf The cache configuration and block cache reference. + * @param cfBloomType The bloom type to use for this store file as specified by column family + * configuration. This may or may not be the same as the Bloom filter type actually + * present in the HFile, because column family configuration might change. If this is + * {@link BloomType#NONE}, the existing Bloom filter is ignored. + * @deprecated Now we will specific whether the StoreFile is for primary replica when + * constructing, so please use {@link #HStoreFile(FileSystem, StoreFileInfo, + * Configuration, CacheConfig, BloomType, boolean)} directly. + */ + @Deprecated + public HStoreFile(final FileSystem fs, final StoreFileInfo fileInfo, final Configuration conf, + final CacheConfig cacheConf, final BloomType cfBloomType) throws IOException { + this(fs, fileInfo, conf, cacheConf, cfBloomType, true); + } + + /** + * Constructor, loads a reader and it's indices, etc. May allocate a substantial amount of ram + * depending on the underlying files (10-20MB?). + * @param fs fs The current file system to use. + * @param fileInfo The store file information. + * @param conf The current configuration. + * @param cacheConf The cache configuration and block cache reference. + * @param cfBloomType The bloom type to use for this store file as specified by column + * family configuration. This may or may not be the same as the Bloom filter type + * actually present in the HFile, because column family configuration might change. If + * this is {@link BloomType#NONE}, the existing Bloom filter is ignored. + * @param primaryReplica true if this is a store file for primary replica, otherwise false. + */ + public HStoreFile(FileSystem fs, StoreFileInfo fileInfo, Configuration conf, CacheConfig cacheConf, + BloomType cfBloomType, boolean primaryReplica) { + this.fs = fs; + this.fileInfo = fileInfo; + this.cacheConf = cacheConf; + this.noReadahead = + conf.getBoolean(STORE_FILE_READER_NO_READAHEAD, DEFAULT_STORE_FILE_READER_NO_READAHEAD); + if (BloomFilterFactory.isGeneralBloomEnabled(conf)) { + this.cfBloomType = cfBloomType; + } else { + LOG.info("Ignoring bloom filter check for file " + this.getPath() + ": " + "cfBloomType=" + + cfBloomType + " (disabled in config)"); + this.cfBloomType = BloomType.NONE; + } + this.primaryReplica = primaryReplica; + } + + @Override + public StoreFileInfo getFileInfo() { + return this.fileInfo; + } + + @Override + public Path getPath() { + return this.fileInfo.getPath(); + } + + @Override + public Path getQualifiedPath() { + return this.fileInfo.getPath().makeQualified(fs.getUri(), fs.getWorkingDirectory()); + } + + @Override + public boolean isReference() { + return this.fileInfo.isReference(); + } + + @Override + public boolean isHFile() { + return StoreFileInfo.isHFile(this.fileInfo.getPath()); + } + + @Override + public boolean isMajorCompactionResult() { + if (this.majorCompaction == null) { + throw new NullPointerException("This has not been set yet"); + } + return this.majorCompaction.get(); + } + + @Override + public boolean excludeFromMinorCompaction() { + return this.excludeFromMinorCompaction; + } + + @Override + public long getMaxSequenceId() { + return this.sequenceid; + } + + @Override + public long getModificationTimeStamp() throws IOException { + return fileInfo.getModificationTime(); + } + + @Override + public byte[] getMetadataValue(byte[] key) { + return metadataMap.get(key); + } + + @Override + public boolean isBulkLoadResult() { + boolean bulkLoadedHFile = false; + String fileName = this.getPath().getName(); + int startPos = fileName.indexOf("SeqId_"); + if (startPos != -1) { + bulkLoadedHFile = true; + } + return bulkLoadedHFile || (metadataMap != null && metadataMap.containsKey(BULKLOAD_TIME_KEY)); + } + + @Override + public boolean isCompactedAway() { + return compactedAway; + } + + @VisibleForTesting + public int getRefCount() { + return refCount.get(); + } + + @Override + public boolean isReferencedInReads() { + int rc = refCount.get(); + assert rc >= 0; // we should not go negative. + return rc > 0; + } + + @Override + public OptionalLong getBulkLoadTimestamp() { + byte[] bulkLoadTimestamp = metadataMap.get(BULKLOAD_TIME_KEY); + return bulkLoadTimestamp == null ? OptionalLong.empty() + : OptionalLong.of(Bytes.toLong(bulkLoadTimestamp)); + } + + @Override + public HDFSBlocksDistribution getHDFSBlockDistribution() { + return this.fileInfo.getHDFSBlockDistribution(); + } + + /** + * Opens reader on this store file. Called by Constructor. + * @throws IOException + * @see #closeReader(boolean) + */ + private void open() throws IOException { + if (this.reader != null) { + throw new IllegalAccessError("Already open"); + } + + // Open the StoreFile.Reader + this.reader = fileInfo.open(this.fs, this.cacheConf, false, noReadahead ? 0L : -1L, + primaryReplica, refCount, true); + + // Load up indices and fileinfo. This also loads Bloom filter type. + metadataMap = Collections.unmodifiableMap(this.reader.loadFileInfo()); + + // Read in our metadata. + byte [] b = metadataMap.get(MAX_SEQ_ID_KEY); + if (b != null) { + // By convention, if halfhfile, top half has a sequence number > bottom + // half. Thats why we add one in below. Its done for case the two halves + // are ever merged back together --rare. Without it, on open of store, + // since store files are distinguished by sequence id, the one half would + // subsume the other. + this.sequenceid = Bytes.toLong(b); + if (fileInfo.isTopReference()) { + this.sequenceid += 1; + } + } + + if (isBulkLoadResult()){ + // generate the sequenceId from the fileName + // fileName is of the form <randomName>_SeqId_<id-when-loaded>_ + String fileName = this.getPath().getName(); + // Use lastIndexOf() to get the last, most recent bulk load seqId. + int startPos = fileName.lastIndexOf("SeqId_"); + if (startPos != -1) { + this.sequenceid = Long.parseLong(fileName.substring(startPos + 6, + fileName.indexOf('_', startPos + 6))); + // Handle reference files as done above. + if (fileInfo.isTopReference()) { + this.sequenceid += 1; + } + } + // SKIP_RESET_SEQ_ID only works in bulk loaded file. + // In mob compaction, the hfile where the cells contain the path of a new mob file is bulk + // loaded to hbase, these cells have the same seqIds with the old ones. We do not want + // to reset new seqIds for them since this might make a mess of the visibility of cells that + // have the same row key but different seqIds. + boolean skipResetSeqId = isSkipResetSeqId(metadataMap.get(SKIP_RESET_SEQ_ID)); + if (skipResetSeqId) { + // increase the seqId when it is a bulk loaded file from mob compaction. + this.sequenceid += 1; + } + this.reader.setSkipResetSeqId(skipResetSeqId); + this.reader.setBulkLoaded(true); + } + this.reader.setSequenceID(this.sequenceid); + + b = metadataMap.get(HFile.Writer.MAX_MEMSTORE_TS_KEY); + if (b != null) { + this.maxMemstoreTS = Bytes.toLong(b); + } + + b = metadataMap.get(MAJOR_COMPACTION_KEY); + if (b != null) { + boolean mc = Bytes.toBoolean(b); + if (this.majorCompaction == null) { + this.majorCompaction = new AtomicBoolean(mc); + } else { + this.majorCompaction.set(mc); + } + } else { + // Presume it is not major compacted if it doesn't explicity say so + // HFileOutputFormat explicitly sets the major compacted key. + this.majorCompaction = new AtomicBoolean(false); + } + + b = metadataMap.get(EXCLUDE_FROM_MINOR_COMPACTION_KEY); + this.excludeFromMinorCompaction = (b != null && Bytes.toBoolean(b)); + + BloomType hfileBloomType = reader.getBloomFilterType(); + if (cfBloomType != BloomType.NONE) { + reader.loadBloomfilter(BlockType.GENERAL_BLOOM_META); + if (hfileBloomType != cfBloomType) { + LOG.info("HFile Bloom filter type for " + + reader.getHFileReader().getName() + ": " + hfileBloomType + + ", but " + cfBloomType + " specified in column family " + + "configuration"); + } + } else if (hfileBloomType != BloomType.NONE) { + LOG.info("Bloom filter turned off by CF config for " + + reader.getHFileReader().getName()); + } + + // load delete family bloom filter + reader.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META); + + try { + this.reader.timeRange = TimeRangeTracker.getTimeRange(metadataMap.get(TIMERANGE_KEY)); + } catch (IllegalArgumentException e) { + LOG.error("Error reading timestamp range data from meta -- " + + "proceeding without", e); + this.reader.timeRange = null; + } + // initialize so we can reuse them after reader closed. + firstKey = reader.getFirstKey(); + lastKey = reader.getLastKey(); + comparator = reader.getComparator(); + } + + @Override + public void initReader() throws IOException { + if (reader == null) { + try { + open(); + } catch (Exception e) { + try { + boolean evictOnClose = cacheConf != null ? cacheConf.shouldEvictOnClose() : true; + this.closeReader(evictOnClose); + } catch (IOException ee) { + LOG.warn("failed to close reader", ee); + } + throw e; + } + } + } + + private StoreFileReader createStreamReader(boolean canUseDropBehind) throws IOException { + initReader(); + StoreFileReader reader = fileInfo.open(this.fs, this.cacheConf, canUseDropBehind, -1L, + primaryReplica, refCount, false); + reader.copyFields(this.reader); + return reader; + } + + @Override + public StoreFileScanner getPreadScanner(boolean cacheBlocks, long readPt, long scannerOrder, + boolean canOptimizeForNonNullColumn) { + return getReader().getStoreFileScanner(cacheBlocks, true, false, readPt, scannerOrder, + canOptimizeForNonNullColumn); + } + + @Override + public StoreFileScanner getStreamScanner(boolean canUseDropBehind, boolean cacheBlocks, + boolean isCompaction, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn) + throws IOException { + return createStreamReader(canUseDropBehind).getStoreFileScanner(cacheBlocks, false, + isCompaction, readPt, scannerOrder, canOptimizeForNonNullColumn); + } + + @Override + public StoreFileReader getReader() { + return this.reader; + } + + @Override + public synchronized void closeReader(boolean evictOnClose) + throws IOException { + if (this.reader != null) { + this.reader.close(evictOnClose); + this.reader = null; + } + } + + @Override + public void markCompactedAway() { + this.compactedAway = true; + } + + @Override + public void deleteReader() throws IOException { + boolean evictOnClose = + cacheConf != null? cacheConf.shouldEvictOnClose(): true; + closeReader(evictOnClose); + this.fs.delete(getPath(), true); + } + + @Override + public String toString() { + return this.fileInfo.toString(); + } + + @Override + public String toStringDetailed() { + StringBuilder sb = new StringBuilder(); + sb.append(this.getPath().toString()); + sb.append(", isReference=").append(isReference()); + sb.append(", isBulkLoadResult=").append(isBulkLoadResult()); + if (isBulkLoadResult()) { + sb.append(", bulkLoadTS="); + OptionalLong bulkLoadTS = getBulkLoadTimestamp(); + if (bulkLoadTS.isPresent()) { + sb.append(bulkLoadTS.getAsLong()); + } else { + sb.append("NotPresent"); + } + } else { + sb.append(", seqid=").append(getMaxSequenceId()); + } + sb.append(", majorCompaction=").append(isMajorCompactionResult()); + + return sb.toString(); + } + + /** + * Gets whether to skip resetting the sequence id for cells. + * @param skipResetSeqId The byte array of boolean. + * @return Whether to skip resetting the sequence id. + */ + private boolean isSkipResetSeqId(byte[] skipResetSeqId) { + if (skipResetSeqId != null && skipResetSeqId.length == 1) { + return Bytes.toBoolean(skipResetSeqId); + } + return false; + } + + @Override + public OptionalLong getMinimumTimestamp() { + TimeRange tr = getReader().timeRange; + return tr != null ? OptionalLong.of(tr.getMin()) : OptionalLong.empty(); + } + + @Override + public OptionalLong getMaximumTimestamp() { + TimeRange tr = getReader().timeRange; + return tr != null ? OptionalLong.of(tr.getMax()) : OptionalLong.empty(); + } +} http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java index 91ff97a..a5efbed 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java @@ -1,5 +1,4 @@ /** - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,789 +17,200 @@ */ package org.apache.hadoop.hbase.regionserver; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Function; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Ordering; - import java.io.IOException; -import java.util.Collection; -import java.util.Collections; import java.util.Comparator; -import java.util.Map; -import java.util.UUID; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; +import java.util.OptionalLong; + import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.HDFSBlocksDistribution; import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.io.hfile.BlockType; +import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.io.hfile.CacheConfig; -import org.apache.hadoop.hbase.io.hfile.HFile; -import org.apache.hadoop.hbase.util.BloomFilterFactory; import org.apache.hadoop.hbase.util.Bytes; /** - * A Store data file. Stores usually have one or more of these files. They - * are produced by flushing the memstore to disk. To - * create, instantiate a writer using {@link StoreFileWriter.Builder} - * and append data. Be sure to add any metadata before calling close on the - * Writer (Use the appendMetadata convenience methods). On close, a StoreFile - * is sitting in the Filesystem. To refer to it, create a StoreFile instance - * passing filesystem and path. To read, call {@link #initReader()} - * <p>StoreFiles may also reference store files in another Store. - * - * The reason for this weird pattern where you use a different instance for the - * writer and a reader is that we write once but read a lot more. + * An interface to describe a store data file. */ -@InterfaceAudience.LimitedPrivate("Coprocessor") -public class StoreFile { - private static final Log LOG = LogFactory.getLog(StoreFile.class.getName()); +@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC) +@InterfaceStability.Evolving +public interface StoreFile { - public static final String STORE_FILE_READER_NO_READAHEAD = "hbase.store.reader.no-readahead"; - - private static final boolean DEFAULT_STORE_FILE_READER_NO_READAHEAD = false; + static final String STORE_FILE_READER_NO_READAHEAD = "hbase.store.reader.no-readahead"; // Keys for fileinfo values in HFile /** Max Sequence ID in FileInfo */ - public static final byte [] MAX_SEQ_ID_KEY = Bytes.toBytes("MAX_SEQ_ID_KEY"); + static final byte[] MAX_SEQ_ID_KEY = Bytes.toBytes("MAX_SEQ_ID_KEY"); /** Major compaction flag in FileInfo */ - public static final byte[] MAJOR_COMPACTION_KEY = - Bytes.toBytes("MAJOR_COMPACTION_KEY"); + static final byte[] MAJOR_COMPACTION_KEY = Bytes.toBytes("MAJOR_COMPACTION_KEY"); /** Minor compaction flag in FileInfo */ - public static final byte[] EXCLUDE_FROM_MINOR_COMPACTION_KEY = + static final byte[] EXCLUDE_FROM_MINOR_COMPACTION_KEY = Bytes.toBytes("EXCLUDE_FROM_MINOR_COMPACTION"); /** Bloom filter Type in FileInfo */ - public static final byte[] BLOOM_FILTER_TYPE_KEY = - Bytes.toBytes("BLOOM_FILTER_TYPE"); + static final byte[] BLOOM_FILTER_TYPE_KEY = Bytes.toBytes("BLOOM_FILTER_TYPE"); /** Delete Family Count in FileInfo */ - public static final byte[] DELETE_FAMILY_COUNT = - Bytes.toBytes("DELETE_FAMILY_COUNT"); + static final byte[] DELETE_FAMILY_COUNT = Bytes.toBytes("DELETE_FAMILY_COUNT"); /** Last Bloom filter key in FileInfo */ - public static final byte[] LAST_BLOOM_KEY = Bytes.toBytes("LAST_BLOOM_KEY"); - - /** Key for Timerange information in metadata*/ - public static final byte[] TIMERANGE_KEY = Bytes.toBytes("TIMERANGE"); - - /** Key for timestamp of earliest-put in metadata*/ - public static final byte[] EARLIEST_PUT_TS = Bytes.toBytes("EARLIEST_PUT_TS"); - - /** Key for the number of mob cells in metadata*/ - public static final byte[] MOB_CELLS_COUNT = Bytes.toBytes("MOB_CELLS_COUNT"); - - private final StoreFileInfo fileInfo; - private final FileSystem fs; - - // Block cache configuration and reference. - private final CacheConfig cacheConf; - - // Counter that is incremented every time a scanner is created on the - // store file. It is decremented when the scan on the store file is - // done. - private final AtomicInteger refCount = new AtomicInteger(0); - - private final boolean noReadahead; - - private final boolean primaryReplica; - - // Indicates if the file got compacted - private volatile boolean compactedAway = false; - - // Keys for metadata stored in backing HFile. - // Set when we obtain a Reader. - private long sequenceid = -1; - - // max of the MemstoreTS in the KV's in this store - // Set when we obtain a Reader. - private long maxMemstoreTS = -1; + static final byte[] LAST_BLOOM_KEY = Bytes.toBytes("LAST_BLOOM_KEY"); - // firstKey, lastkey and cellComparator will be set when openReader. - private Cell firstKey; + /** Key for Timerange information in metadata */ + static final byte[] TIMERANGE_KEY = Bytes.toBytes("TIMERANGE"); - private Cell lastKey; + /** Key for timestamp of earliest-put in metadata */ + static final byte[] EARLIEST_PUT_TS = Bytes.toBytes("EARLIEST_PUT_TS"); - private Comparator<Cell> comparator; - - CacheConfig getCacheConf() { - return cacheConf; - } - - public Cell getFirstKey() { - return firstKey; - } - - public Cell getLastKey() { - return lastKey; - } - - public Comparator<Cell> getComparator() { - return comparator; - } - - public long getMaxMemstoreTS() { - return maxMemstoreTS; - } - - public void setMaxMemstoreTS(long maxMemstoreTS) { - this.maxMemstoreTS = maxMemstoreTS; - } - - // If true, this file was product of a major compaction. Its then set - // whenever you get a Reader. - private AtomicBoolean majorCompaction = null; - - // If true, this file should not be included in minor compactions. - // It's set whenever you get a Reader. - private boolean excludeFromMinorCompaction = false; + /** Key for the number of mob cells in metadata */ + static final byte[] MOB_CELLS_COUNT = Bytes.toBytes("MOB_CELLS_COUNT"); /** Meta key set when store file is a result of a bulk load */ - public static final byte[] BULKLOAD_TASK_KEY = - Bytes.toBytes("BULKLOAD_SOURCE_TASK"); - public static final byte[] BULKLOAD_TIME_KEY = - Bytes.toBytes("BULKLOAD_TIMESTAMP"); + static final byte[] BULKLOAD_TASK_KEY = Bytes.toBytes("BULKLOAD_SOURCE_TASK"); + static final byte[] BULKLOAD_TIME_KEY = Bytes.toBytes("BULKLOAD_TIMESTAMP"); /** - * Map of the metadata entries in the corresponding HFile. Populated when Reader is opened - * after which it is not modified again. + * Key for skipping resetting sequence id in metadata. For bulk loaded hfiles, the scanner resets + * the cell seqId with the latest one, if this metadata is set as true, the reset is skipped. */ - private Map<byte[], byte[]> metadataMap; + static final byte[] SKIP_RESET_SEQ_ID = Bytes.toBytes("SKIP_RESET_SEQ_ID"); - // StoreFile.Reader - private volatile StoreFileReader reader; + CacheConfig getCacheConf(); - /** - * Bloom filter type specified in column family configuration. Does not - * necessarily correspond to the Bloom filter type present in the HFile. - */ - private final BloomType cfBloomType; + Cell getFirstKey(); - /** - * Key for skipping resetting sequence id in metadata. - * For bulk loaded hfiles, the scanner resets the cell seqId with the latest one, - * if this metadata is set as true, the reset is skipped. - */ - public static final byte[] SKIP_RESET_SEQ_ID = Bytes.toBytes("SKIP_RESET_SEQ_ID"); + Cell getLastKey(); - /** - * Constructor, loads a reader and it's indices, etc. May allocate a substantial amount of ram - * depending on the underlying files (10-20MB?). - * @param fs The current file system to use. - * @param p The path of the file. - * @param conf The current configuration. - * @param cacheConf The cache configuration and block cache reference. - * @param cfBloomType The bloom type to use for this store file as specified by column family - * configuration. This may or may not be the same as the Bloom filter type actually - * present in the HFile, because column family configuration might change. If this is - * {@link BloomType#NONE}, the existing Bloom filter is ignored. - * @deprecated Now we will specific whether the StoreFile is for primary replica when - * constructing, so please use - * {@link #StoreFile(FileSystem, Path, Configuration, CacheConfig, BloomType, boolean)} - * directly. - */ - @Deprecated - public StoreFile(final FileSystem fs, final Path p, final Configuration conf, - final CacheConfig cacheConf, final BloomType cfBloomType) throws IOException { - this(fs, new StoreFileInfo(conf, fs, p), conf, cacheConf, cfBloomType); - } + Comparator<Cell> getComparator(); - /** - * Constructor, loads a reader and it's indices, etc. May allocate a substantial amount of ram - * depending on the underlying files (10-20MB?). - * @param fs The current file system to use. - * @param p The path of the file. - * @param conf The current configuration. - * @param cacheConf The cache configuration and block cache reference. - * @param cfBloomType The bloom type to use for this store file as specified by column family - * configuration. This may or may not be the same as the Bloom filter type actually - * present in the HFile, because column family configuration might change. If this is - * {@link BloomType#NONE}, the existing Bloom filter is ignored. - * @param primaryReplica true if this is a store file for primary replica, otherwise false. - * @throws IOException - */ - public StoreFile(FileSystem fs, Path p, Configuration conf, CacheConfig cacheConf, - BloomType cfBloomType, boolean primaryReplica) throws IOException { - this(fs, new StoreFileInfo(conf, fs, p), conf, cacheConf, cfBloomType, primaryReplica); - } - - /** - * Constructor, loads a reader and it's indices, etc. May allocate a substantial amount of ram - * depending on the underlying files (10-20MB?). - * @param fs The current file system to use. - * @param fileInfo The store file information. - * @param conf The current configuration. - * @param cacheConf The cache configuration and block cache reference. - * @param cfBloomType The bloom type to use for this store file as specified by column family - * configuration. This may or may not be the same as the Bloom filter type actually - * present in the HFile, because column family configuration might change. If this is - * {@link BloomType#NONE}, the existing Bloom filter is ignored. - * @deprecated Now we will specific whether the StoreFile is for primary replica when - * constructing, so please use - * {@link #StoreFile(FileSystem, StoreFileInfo, Configuration, CacheConfig, BloomType, boolean)} - * directly. - */ - @Deprecated - public StoreFile(final FileSystem fs, final StoreFileInfo fileInfo, final Configuration conf, - final CacheConfig cacheConf, final BloomType cfBloomType) throws IOException { - this(fs, fileInfo, conf, cacheConf, cfBloomType, true); - } + long getMaxMemstoreTS(); /** - * Constructor, loads a reader and it's indices, etc. May allocate a substantial amount of ram - * depending on the underlying files (10-20MB?). - * @param fs fs The current file system to use. - * @param fileInfo The store file information. - * @param conf The current configuration. - * @param cacheConf The cache configuration and block cache reference. - * @param cfBloomType cfBloomType The bloom type to use for this store file as specified by column - * family configuration. This may or may not be the same as the Bloom filter type - * actually present in the HFile, because column family configuration might change. If - * this is {@link BloomType#NONE}, the existing Bloom filter is ignored. - * @param primaryReplica true if this is a store file for primary replica, otherwise false. + * @return the StoreFile object associated to this StoreFile. null if the StoreFile is not a + * reference. */ - public StoreFile(FileSystem fs, StoreFileInfo fileInfo, Configuration conf, CacheConfig cacheConf, - BloomType cfBloomType, boolean primaryReplica) { - this.fs = fs; - this.fileInfo = fileInfo; - this.cacheConf = cacheConf; - this.noReadahead = - conf.getBoolean(STORE_FILE_READER_NO_READAHEAD, DEFAULT_STORE_FILE_READER_NO_READAHEAD); - if (BloomFilterFactory.isGeneralBloomEnabled(conf)) { - this.cfBloomType = cfBloomType; - } else { - LOG.info("Ignoring bloom filter check for file " + this.getPath() + ": " + "cfBloomType=" + - cfBloomType + " (disabled in config)"); - this.cfBloomType = BloomType.NONE; - } - this.primaryReplica = primaryReplica; - } - - /** - * @return the StoreFile object associated to this StoreFile. - * null if the StoreFile is not a reference. - */ - public StoreFileInfo getFileInfo() { - return this.fileInfo; - } + StoreFileInfo getFileInfo(); /** * @return Path or null if this StoreFile was made with a Stream. */ - public Path getPath() { - return this.fileInfo.getPath(); - } + Path getPath(); /** * @return Returns the qualified path of this StoreFile */ - public Path getQualifiedPath() { - return this.fileInfo.getPath().makeQualified(fs.getUri(), fs.getWorkingDirectory()); - } + Path getQualifiedPath(); /** - * @return True if this is a StoreFile Reference; call - * after {@link #open()} else may get wrong answer. + * @return True if this is a StoreFile Reference. */ - public boolean isReference() { - return this.fileInfo.isReference(); - } + boolean isReference(); /** * @return True if this is HFile. */ - public boolean isHFile() { - return StoreFileInfo.isHFile(this.fileInfo.getPath()); - } + boolean isHFile(); /** * @return True if this file was made by a major compaction. */ - public boolean isMajorCompaction() { - if (this.majorCompaction == null) { - throw new NullPointerException("This has not been set yet"); - } - return this.majorCompaction.get(); - } + boolean isMajorCompactionResult(); /** * @return True if this file should not be part of a minor compaction. */ - public boolean excludeFromMinorCompaction() { - return this.excludeFromMinorCompaction; - } + boolean excludeFromMinorCompaction(); /** * @return This files maximum edit sequence id. */ - public long getMaxSequenceId() { - return this.sequenceid; - } + long getMaxSequenceId(); - public long getModificationTimeStamp() throws IOException { - return (fileInfo == null) ? 0 : fileInfo.getModificationTime(); - } + long getModificationTimeStamp() throws IOException; /** * Only used by the Striped Compaction Policy * @param key * @return value associated with the metadata key */ - public byte[] getMetadataValue(byte[] key) { - return metadataMap.get(key); - } - - /** - * Return the largest memstoreTS found across all storefiles in - * the given list. Store files that were created by a mapreduce - * bulk load are ignored, as they do not correspond to any specific - * put operation, and thus do not have a memstoreTS associated with them. - * @return 0 if no non-bulk-load files are provided or, this is Store that - * does not yet have any store files. - */ - public static long getMaxMemstoreTSInList(Collection<StoreFile> sfs) { - long max = 0; - for (StoreFile sf : sfs) { - if (!sf.isBulkLoadResult()) { - max = Math.max(max, sf.getMaxMemstoreTS()); - } - } - return max; - } - - /** - * Return the highest sequence ID found across all storefiles in - * the given list. - * @param sfs - * @return 0 if no non-bulk-load files are provided or, this is Store that - * does not yet have any store files. - */ - public static long getMaxSequenceIdInList(Collection<StoreFile> sfs) { - long max = 0; - for (StoreFile sf : sfs) { - max = Math.max(max, sf.getMaxSequenceId()); - } - return max; - } + byte[] getMetadataValue(byte[] key); /** - * Check if this storefile was created by bulk load. - * When a hfile is bulk loaded into HBase, we append - * {@code '_SeqId_<id-when-loaded>'} to the hfile name, unless - * "hbase.mapreduce.bulkload.assign.sequenceNumbers" is - * explicitly turned off. - * If "hbase.mapreduce.bulkload.assign.sequenceNumbers" - * is turned off, fall back to BULKLOAD_TIME_KEY. + * Check if this storefile was created by bulk load. When a hfile is bulk loaded into HBase, we + * append {@code '_SeqId_<id-when-loaded>'} to the hfile name, unless + * "hbase.mapreduce.bulkload.assign.sequenceNumbers" is explicitly turned off. If + * "hbase.mapreduce.bulkload.assign.sequenceNumbers" is turned off, fall back to + * BULKLOAD_TIME_KEY. * @return true if this storefile was created by bulk load. */ - public boolean isBulkLoadResult() { - boolean bulkLoadedHFile = false; - String fileName = this.getPath().getName(); - int startPos = fileName.indexOf("SeqId_"); - if (startPos != -1) { - bulkLoadedHFile = true; - } - return bulkLoadedHFile || (metadataMap != null && metadataMap.containsKey(BULKLOAD_TIME_KEY)); - } - - @VisibleForTesting - public boolean isCompactedAway() { - return compactedAway; - } - - @VisibleForTesting - public int getRefCount() { - return refCount.get(); - } + boolean isBulkLoadResult(); + + boolean isCompactedAway(); /** * @return true if the file is still used in reads */ - public boolean isReferencedInReads() { - int rc = refCount.get(); - assert rc >= 0; // we should not go negative. - return rc > 0; - } + boolean isReferencedInReads(); /** * Return the timestamp at which this bulk load file was generated. */ - public long getBulkLoadTimestamp() { - byte[] bulkLoadTimestamp = metadataMap.get(BULKLOAD_TIME_KEY); - return (bulkLoadTimestamp == null) ? 0 : Bytes.toLong(bulkLoadTimestamp); - } + OptionalLong getBulkLoadTimestamp(); /** - * @return the cached value of HDFS blocks distribution. The cached value is - * calculated when store file is opened. + * @return the cached value of HDFS blocks distribution. The cached value is calculated when store + * file is opened. */ - public HDFSBlocksDistribution getHDFSBlockDistribution() { - return this.fileInfo.getHDFSBlockDistribution(); - } + HDFSBlocksDistribution getHDFSBlockDistribution(); /** - * Opens reader on this store file. Called by Constructor. - * @throws IOException - * @see #closeReader(boolean) + * Initialize the reader used for pread. */ - private void open() throws IOException { - if (this.reader != null) { - throw new IllegalAccessError("Already open"); - } - - // Open the StoreFile.Reader - this.reader = fileInfo.open(this.fs, this.cacheConf, false, noReadahead ? 0L : -1L, - primaryReplica, refCount, true); - - // Load up indices and fileinfo. This also loads Bloom filter type. - metadataMap = Collections.unmodifiableMap(this.reader.loadFileInfo()); - - // Read in our metadata. - byte [] b = metadataMap.get(MAX_SEQ_ID_KEY); - if (b != null) { - // By convention, if halfhfile, top half has a sequence number > bottom - // half. Thats why we add one in below. Its done for case the two halves - // are ever merged back together --rare. Without it, on open of store, - // since store files are distinguished by sequence id, the one half would - // subsume the other. - this.sequenceid = Bytes.toLong(b); - if (fileInfo.isTopReference()) { - this.sequenceid += 1; - } - } - - if (isBulkLoadResult()){ - // generate the sequenceId from the fileName - // fileName is of the form <randomName>_SeqId_<id-when-loaded>_ - String fileName = this.getPath().getName(); - // Use lastIndexOf() to get the last, most recent bulk load seqId. - int startPos = fileName.lastIndexOf("SeqId_"); - if (startPos != -1) { - this.sequenceid = Long.parseLong(fileName.substring(startPos + 6, - fileName.indexOf('_', startPos + 6))); - // Handle reference files as done above. - if (fileInfo.isTopReference()) { - this.sequenceid += 1; - } - } - // SKIP_RESET_SEQ_ID only works in bulk loaded file. - // In mob compaction, the hfile where the cells contain the path of a new mob file is bulk - // loaded to hbase, these cells have the same seqIds with the old ones. We do not want - // to reset new seqIds for them since this might make a mess of the visibility of cells that - // have the same row key but different seqIds. - boolean skipResetSeqId = isSkipResetSeqId(metadataMap.get(SKIP_RESET_SEQ_ID)); - if (skipResetSeqId) { - // increase the seqId when it is a bulk loaded file from mob compaction. - this.sequenceid += 1; - } - this.reader.setSkipResetSeqId(skipResetSeqId); - this.reader.setBulkLoaded(true); - } - this.reader.setSequenceID(this.sequenceid); - - b = metadataMap.get(HFile.Writer.MAX_MEMSTORE_TS_KEY); - if (b != null) { - this.maxMemstoreTS = Bytes.toLong(b); - } - - b = metadataMap.get(MAJOR_COMPACTION_KEY); - if (b != null) { - boolean mc = Bytes.toBoolean(b); - if (this.majorCompaction == null) { - this.majorCompaction = new AtomicBoolean(mc); - } else { - this.majorCompaction.set(mc); - } - } else { - // Presume it is not major compacted if it doesn't explicity say so - // HFileOutputFormat explicitly sets the major compacted key. - this.majorCompaction = new AtomicBoolean(false); - } - - b = metadataMap.get(EXCLUDE_FROM_MINOR_COMPACTION_KEY); - this.excludeFromMinorCompaction = (b != null && Bytes.toBoolean(b)); - - BloomType hfileBloomType = reader.getBloomFilterType(); - if (cfBloomType != BloomType.NONE) { - reader.loadBloomfilter(BlockType.GENERAL_BLOOM_META); - if (hfileBloomType != cfBloomType) { - LOG.info("HFile Bloom filter type for " - + reader.getHFileReader().getName() + ": " + hfileBloomType - + ", but " + cfBloomType + " specified in column family " - + "configuration"); - } - } else if (hfileBloomType != BloomType.NONE) { - LOG.info("Bloom filter turned off by CF config for " - + reader.getHFileReader().getName()); - } - - // load delete family bloom filter - reader.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META); - - try { - this.reader.timeRange = TimeRangeTracker.getTimeRange(metadataMap.get(TIMERANGE_KEY)); - } catch (IllegalArgumentException e) { - LOG.error("Error reading timestamp range data from meta -- " + - "proceeding without", e); - this.reader.timeRange = null; - } - // initialize so we can reuse them after reader closed. - firstKey = reader.getFirstKey(); - lastKey = reader.getLastKey(); - comparator = reader.getComparator(); - } + void initReader() throws IOException; /** - * Initialize the reader used for pread. + * Must be called after initReader. */ - public void initReader() throws IOException { - if (reader == null) { - try { - open(); - } catch (Exception e) { - try { - boolean evictOnClose = cacheConf != null ? cacheConf.shouldEvictOnClose() : true; - this.closeReader(evictOnClose); - } catch (IOException ee) { - LOG.warn("failed to close reader", ee); - } - throw e; - } - } - } - - private StoreFileReader createStreamReader(boolean canUseDropBehind) throws IOException { - initReader(); - StoreFileReader reader = fileInfo.open(this.fs, this.cacheConf, canUseDropBehind, -1L, - primaryReplica, refCount, false); - reader.copyFields(this.reader); - return reader; - } - - public StoreFileScanner getPreadScanner(boolean cacheBlocks, long readPt, long scannerOrder, - boolean canOptimizeForNonNullColumn) { - return getReader().getStoreFileScanner(cacheBlocks, true, false, readPt, scannerOrder, - canOptimizeForNonNullColumn); - } - - public StoreFileScanner getStreamScanner(boolean canUseDropBehind, boolean cacheBlocks, + StoreFileScanner getPreadScanner(boolean cacheBlocks, long readPt, long scannerOrder, + boolean canOptimizeForNonNullColumn); + + StoreFileScanner getStreamScanner(boolean canUseDropBehind, boolean cacheBlocks, boolean isCompaction, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn) - throws IOException { - return createStreamReader(canUseDropBehind).getStoreFileScanner(cacheBlocks, false, - isCompaction, readPt, scannerOrder, canOptimizeForNonNullColumn); - } + throws IOException; /** - * @return Current reader. Must call initReader first else returns null. + * @return Current reader. Must call initReader first else returns null. * @see #initReader() */ - public StoreFileReader getReader() { - return this.reader; - } + StoreFileReader getReader(); /** * @param evictOnClose whether to evict blocks belonging to this file * @throws IOException */ - public synchronized void closeReader(boolean evictOnClose) - throws IOException { - if (this.reader != null) { - this.reader.close(evictOnClose); - this.reader = null; - } - } + void closeReader(boolean evictOnClose) throws IOException; /** * Marks the status of the file as compactedAway. */ - public void markCompactedAway() { - this.compactedAway = true; - } + void markCompactedAway(); /** * Delete this file * @throws IOException */ - public void deleteReader() throws IOException { - boolean evictOnClose = - cacheConf != null? cacheConf.shouldEvictOnClose(): true; - closeReader(evictOnClose); - this.fs.delete(getPath(), true); - } - - @Override - public String toString() { - return this.fileInfo.toString(); - } + void deleteReader() throws IOException; /** * @return a length description of this StoreFile, suitable for debug output */ - public String toStringDetailed() { - StringBuilder sb = new StringBuilder(); - sb.append(this.getPath().toString()); - sb.append(", isReference=").append(isReference()); - sb.append(", isBulkLoadResult=").append(isBulkLoadResult()); - if (isBulkLoadResult()) { - sb.append(", bulkLoadTS=").append(getBulkLoadTimestamp()); - } else { - sb.append(", seqid=").append(getMaxSequenceId()); - } - sb.append(", majorCompaction=").append(isMajorCompaction()); - - return sb.toString(); - } - - /** - * Gets whether to skip resetting the sequence id for cells. - * @param skipResetSeqId The byte array of boolean. - * @return Whether to skip resetting the sequence id. - */ - private boolean isSkipResetSeqId(byte[] skipResetSeqId) { - if (skipResetSeqId != null && skipResetSeqId.length == 1) { - return Bytes.toBoolean(skipResetSeqId); - } - return false; - } - - /** - * @param fs - * @param dir Directory to create file in. - * @return random filename inside passed <code>dir</code> - */ - public static Path getUniqueFile(final FileSystem fs, final Path dir) - throws IOException { - if (!fs.getFileStatus(dir).isDirectory()) { - throw new IOException("Expecting " + dir.toString() + - " to be a directory"); - } - return new Path(dir, UUID.randomUUID().toString().replaceAll("-", "")); - } + String toStringDetailed(); - public Long getMinimumTimestamp() { - return getReader().timeRange == null? null: getReader().timeRange.getMin(); - } + OptionalLong getMinimumTimestamp(); - public Long getMaximumTimestamp() { - return getReader().timeRange == null? null: getReader().timeRange.getMax(); - } - - - /** - * Gets the approximate mid-point of this file that is optimal for use in splitting it. - * @param comparator Comparator used to compare KVs. - * @return The split point row, or null if splitting is not possible, or reader is null. - */ - byte[] getFileSplitPoint(CellComparator comparator) throws IOException { - if (this.reader == null) { - LOG.warn("Storefile " + this + " Reader is null; cannot get split point"); - return null; - } - // Get first, last, and mid keys. Midkey is the key that starts block - // in middle of hfile. Has column and timestamp. Need to return just - // the row we want to split on as midkey. - Cell midkey = this.reader.midkey(); - if (midkey != null) { - Cell firstKey = this.reader.getFirstKey(); - Cell lastKey = this.reader.getLastKey(); - // if the midkey is the same as the first or last keys, we cannot (ever) split this region. - if (comparator.compareRows(midkey, firstKey) == 0 - || comparator.compareRows(midkey, lastKey) == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug("cannot split because midkey is the same as first or last row"); - } - return null; - } - return CellUtil.cloneRow(midkey); - } - return null; - } - - /** - * Useful comparators for comparing StoreFiles. - */ - public abstract static class Comparators { - /** - * Comparator that compares based on the Sequence Ids of the - * the StoreFiles. Bulk loads that did not request a seq ID - * are given a seq id of -1; thus, they are placed before all non- - * bulk loads, and bulk loads with sequence Id. Among these files, - * the size is used to determine the ordering, then bulkLoadTime. - * If there are ties, the path name is used as a tie-breaker. - */ - public static final Comparator<StoreFile> SEQ_ID = - Ordering.compound(ImmutableList.of( - Ordering.natural().onResultOf(new GetSeqId()), - Ordering.natural().onResultOf(new GetFileSize()).reverse(), - Ordering.natural().onResultOf(new GetBulkTime()), - Ordering.natural().onResultOf(new GetPathName()) - )); - - /** - * Comparator for time-aware compaction. SeqId is still the first - * ordering criterion to maintain MVCC. - */ - public static final Comparator<StoreFile> SEQ_ID_MAX_TIMESTAMP = - Ordering.compound(ImmutableList.of( - Ordering.natural().onResultOf(new GetSeqId()), - Ordering.natural().onResultOf(new GetMaxTimestamp()), - Ordering.natural().onResultOf(new GetFileSize()).reverse(), - Ordering.natural().onResultOf(new GetBulkTime()), - Ordering.natural().onResultOf(new GetPathName()) - )); - - private static class GetSeqId implements Function<StoreFile, Long> { - @Override - public Long apply(StoreFile sf) { - return sf.getMaxSequenceId(); - } - } - - private static class GetFileSize implements Function<StoreFile, Long> { - @Override - public Long apply(StoreFile sf) { - if (sf.getReader() != null) { - return sf.getReader().length(); - } else { - // the reader may be null for the compacted files and if the archiving - // had failed. - return -1L; - } - } - } - - private static class GetBulkTime implements Function<StoreFile, Long> { - @Override - public Long apply(StoreFile sf) { - if (!sf.isBulkLoadResult()) return Long.MAX_VALUE; - return sf.getBulkLoadTimestamp(); - } - } - - private static class GetPathName implements Function<StoreFile, String> { - @Override - public String apply(StoreFile sf) { - return sf.getPath().getName(); - } - } - - private static class GetMaxTimestamp implements Function<StoreFile, Long> { - @Override - public Long apply(StoreFile sf) { - return sf.getMaximumTimestamp() == null? (Long)Long.MAX_VALUE : sf.getMaximumTimestamp(); - } - } - } + OptionalLong getMaximumTimestamp(); }