HIVE-12075 : add analyze command to explictly cache file metadata in HBase metastore (Sergey Shelukhin, reviewed by Alan Gates)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7df62023 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7df62023 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7df62023 Branch: refs/heads/master Commit: 7df62023f8a328046055486de46121fd16b7458a Parents: 542eaf6 Author: Sergey Shelukhin <ser...@apache.org> Authored: Fri Dec 18 14:41:29 2015 -0800 Committer: Sergey Shelukhin <ser...@apache.org> Committed: Fri Dec 18 14:41:29 2015 -0800 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 5 +- .../test/resources/testconfiguration.properties | 1 + metastore/if/hive_metastore.thrift | 17 +- .../gen/thrift/gen-cpp/ThriftHiveMetastore.cpp | 2257 ++++++++++-------- .../gen/thrift/gen-cpp/ThriftHiveMetastore.h | 126 + .../ThriftHiveMetastore_server.skeleton.cpp | 5 + .../gen/thrift/gen-cpp/hive_metastore_types.cpp | 575 +++-- .../gen/thrift/gen-cpp/hive_metastore_types.h | 125 +- .../metastore/api/CacheFileMetadataRequest.java | 702 ++++++ .../metastore/api/CacheFileMetadataResult.java | 386 +++ .../metastore/api/FileMetadataExprType.java | 4 - .../metastore/api/PutFileMetadataRequest.java | 124 +- .../hive/metastore/api/ThriftHiveMetastore.java | 1478 +++++++++--- .../gen-php/metastore/ThriftHiveMetastore.php | 215 ++ .../src/gen/thrift/gen-php/metastore/Types.php | 242 ++ .../hive_metastore/ThriftHiveMetastore-remote | 7 + .../hive_metastore/ThriftHiveMetastore.py | 189 ++ .../gen/thrift/gen-py/hive_metastore/ttypes.py | 190 +- .../gen/thrift/gen-rb/hive_metastore_types.rb | 48 +- .../gen/thrift/gen-rb/thrift_hive_metastore.rb | 54 + .../hadoop/hive/metastore/FileFormatProxy.java | 64 + .../hive/metastore/FileMetadataHandler.java | 84 +- .../hive/metastore/FileMetadataManager.java | 129 + .../hadoop/hive/metastore/HiveMetaStore.java | 140 +- .../hive/metastore/HiveMetaStoreClient.java | 17 + .../hadoop/hive/metastore/IMetaStoreClient.java | 3 + .../hadoop/hive/metastore/MetaStoreUtils.java | 6 +- .../hadoop/hive/metastore/ObjectStore.java | 17 +- .../metastore/PartitionExpressionProxy.java | 36 +- .../apache/hadoop/hive/metastore/RawStore.java | 7 +- .../filemeta/OrcFileMetadataHandler.java | 24 +- .../hive/metastore/hbase/HBaseReadWrite.java | 60 +- .../hadoop/hive/metastore/hbase/HBaseStore.java | 50 +- .../hive/metastore/hbase/MetadataStore.java | 52 + .../DummyRawStoreControlledCommit.java | 8 +- .../DummyRawStoreForJdoConnection.java | 8 +- .../MockPartitionExpressionForMetastore.java | 9 +- .../hadoop/hive/metastore/TestObjectStore.java | 9 +- .../hadoop/hive/metastore/hbase/MockUtils.java | 10 +- .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 12 + .../hive/ql/io/orc/OrcFileFormatProxy.java | 74 + .../apache/hadoop/hive/ql/metadata/Hive.java | 13 + .../ppr/PartitionExpressionForMetastore.java | 54 +- .../hive/ql/parse/AnalyzeCommandUtils.java | 57 + .../ql/parse/ColumnStatsSemanticAnalyzer.java | 36 +- .../hive/ql/parse/DDLSemanticAnalyzer.java | 27 +- .../org/apache/hadoop/hive/ql/parse/HiveLexer.g | 1 + .../apache/hadoop/hive/ql/parse/HiveParser.g | 8 +- .../hive/ql/parse/SemanticAnalyzerFactory.java | 2 + .../hadoop/hive/ql/plan/CacheMetadataDesc.java | 58 + .../org/apache/hadoop/hive/ql/plan/DDLWork.java | 14 + .../hadoop/hive/ql/plan/HiveOperation.java | 1 + .../queries/clientpositive/stats_filemetadata.q | 17 + .../clientpositive/tez/stats_filemetadata.q.out | 54 + .../gen-py/hive_service/ThriftHive-remote | 21 + .../org/apache/hadoop/hive/io/HdfsUtils.java | 61 + 56 files changed, 6440 insertions(+), 1553 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 67c4213..96a3fb5 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -200,7 +200,8 @@ public class HiveConf extends Configuration { HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_CACHE_ENTRIES, HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_MEMORY_TTL, HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_INVALIDATOR_FREQUENCY, - HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_HBASE_TTL + HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_HBASE_TTL, + HiveConf.ConfVars.METASTORE_HBASE_FILE_METADATA_THREADS }; /** @@ -443,6 +444,8 @@ public class HiveConf extends Configuration { new TimeValidator(TimeUnit.SECONDS), "Number of seconds stats entries live in HBase cache after they are created. They may be" + " invalided by updates or partition drops before this. Default is one week."), + METASTORE_HBASE_FILE_METADATA_THREADS("hive.metastore.hbase.file.metadata.threads", 1, + "Number of threads to use to read file metadata in background to cache it."), METASTORETHRIFTCONNECTIONRETRIES("hive.metastore.connect.retries", 3, "Number of retries while opening a connection to metastore"), http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 1e7dce3..2d230ef 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -434,6 +434,7 @@ minillap.query.files=bucket_map_join_tez1.q,\ mapjoin_decimal.q,\ lvj_mapjoin.q,\ llapdecider.q,\ + stats_filemetadata.q,\ mrr.q,\ orc_ppd_basic.q,\ tez_bmj_schema_evolution.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/metastore/if/hive_metastore.thrift ---------------------------------------------------------------------- diff --git a/metastore/if/hive_metastore.thrift b/metastore/if/hive_metastore.thrift index bb754f1..e6403ed 100755 --- a/metastore/if/hive_metastore.thrift +++ b/metastore/if/hive_metastore.thrift @@ -756,7 +756,8 @@ struct PutFileMetadataResult { // Request type for put_file_metadata struct PutFileMetadataRequest { 1: required list<i64> fileIds, - 2: required list<binary> metadata + 2: required list<binary> metadata, + 3: optional FileMetadataExprType type } // Return type for clear_file_metadata @@ -768,6 +769,19 @@ struct ClearFileMetadataRequest { 1: required list<i64> fileIds } +// Return type for cache_file_metadata +struct CacheFileMetadataResult { + 1: required bool isSupported +} + +// Request type for cache_file_metadata +struct CacheFileMetadataRequest { + 1: required string dbName, + 2: required string tblName, + 3: optional string partName, + 4: optional bool isAllParts +} + struct GetAllFunctionsResponse { 1: optional list<Function> functions } @@ -1267,6 +1281,7 @@ service ThriftHiveMetastore extends fb303.FacebookService GetFileMetadataResult get_file_metadata(1:GetFileMetadataRequest req) PutFileMetadataResult put_file_metadata(1:PutFileMetadataRequest req) ClearFileMetadataResult clear_file_metadata(1:ClearFileMetadataRequest req) + CacheFileMetadataResult cache_file_metadata(1:CacheFileMetadataRequest req) }