http://git-wip-us.apache.org/repos/asf/hive/blob/651e7950/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java ---------------------------------------------------------------------- diff --cc standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 0000000,c2bbba5..7b32c08 mode 000000,100644..100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@@ -1,0 -1,1686 +1,1688 @@@ + /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.hive.metastore.conf; + + import com.google.common.annotations.VisibleForTesting; + import org.apache.hadoop.conf.Configuration; + import org.apache.hadoop.hive.metastore.DefaultStorageSchemaReader; + import org.apache.hadoop.hive.metastore.HiveAlterHandler; + import org.apache.hadoop.hive.metastore.MaterializationsRebuildLockCleanerTask; + import org.apache.hadoop.hive.metastore.MetastoreTaskThread; + import org.apache.hadoop.hive.metastore.RuntimeStatsCleanerTask; + import org.apache.hadoop.hive.metastore.events.EventCleanerTask; + import org.apache.hadoop.hive.metastore.security.MetastoreDelegationTokenManager; + import org.apache.hadoop.hive.metastore.txn.AcidCompactionHistoryService; + import org.apache.hadoop.hive.metastore.txn.AcidHouseKeeperService; + import org.apache.hadoop.hive.metastore.txn.AcidOpenTxnsCounterService; + import org.apache.hadoop.hive.metastore.txn.AcidWriteSetService; + import org.apache.hadoop.hive.metastore.utils.StringUtils; + import org.slf4j.Logger; + import org.slf4j.LoggerFactory; + + import java.io.File; + import java.io.IOException; + import java.net.URI; + import java.net.URL; + import java.util.Arrays; + import java.util.Collection; + import java.util.Collections; + import java.util.HashMap; + import java.util.List; + import java.util.Map; + import java.util.Set; + import java.util.concurrent.TimeUnit; + import java.util.concurrent.atomic.AtomicBoolean; + import java.util.regex.Matcher; + import java.util.regex.Pattern; + + /** + * A set of definitions of config values used by the Metastore. One of the key aims of this + * class is to provide backwards compatibility with existing Hive configuration keys while + * allowing the metastore to have its own, Hive independent keys. For this reason access to the + * underlying Configuration object should always be done via the static methods provided here + * rather than directly via {@link Configuration#get(String)} and + * {@link Configuration#set(String, String)}. All the methods of this class will handle checking + * both the MetastoreConf key and the Hive key. The algorithm is, on reads, to check first the + * MetastoreConf key, then the Hive key, then return the default if neither are set. On write + * the Metastore key only is set. + * + * This class does not extend Configuration. Rather it provides static methods for operating on + * a Configuration object. This allows it to work on HiveConf objects, which otherwise would not + * be the case. + */ + public class MetastoreConf { + + private static final Logger LOG = LoggerFactory.getLogger(MetastoreConf.class); + private static final Pattern TIME_UNIT_SUFFIX = Pattern.compile("([0-9]+)([a-zA-Z]+)"); + + private static final Map<String, ConfVars> metaConfs = new HashMap<>(); + private static URL hiveDefaultURL = null; + private static URL hiveSiteURL = null; + private static URL hiveMetastoreSiteURL = null; + private static URL metastoreSiteURL = null; + private static AtomicBoolean beenDumped = new AtomicBoolean(); + + private static Map<String, ConfVars> keyToVars; + + @VisibleForTesting + static final String TEST_ENV_WORKAROUND = "metastore.testing.env.workaround.dont.ever.set.this."; + + public static enum StatsUpdateMode { + NONE, EXISTING, ALL + } + + private static class TimeValue { + final long val; + final TimeUnit unit; + + private TimeValue(long val, TimeUnit unit) { + this.val = val; + this.unit = unit; + } + + @Override + public String toString() { + switch (unit) { + case NANOSECONDS: return Long.toString(val) + "ns"; + case MICROSECONDS: return Long.toString(val) + "us"; + case MILLISECONDS: return Long.toString(val) + "ms"; + case SECONDS: return Long.toString(val) + "s"; + case MINUTES: return Long.toString(val) + "m"; + case HOURS: return Long.toString(val) + "h"; + case DAYS: return Long.toString(val) + "d"; + } + throw new RuntimeException("Unknown time unit " + unit); + } + } + + /** + * Metastore related options that the db is initialized against. When a conf + * var in this is list is changed, the metastore instance for the CLI will + * be recreated so that the change will take effect. + * TODO - I suspect the vast majority of these don't need to be here. But it requires testing + * before just pulling them out. + */ + public static final MetastoreConf.ConfVars[] metaVars = { + ConfVars.WAREHOUSE, + ConfVars.REPLDIR, + ConfVars.THRIFT_URIS, + ConfVars.SERVER_PORT, + ConfVars.THRIFT_CONNECTION_RETRIES, + ConfVars.THRIFT_FAILURE_RETRIES, + ConfVars.CLIENT_CONNECT_RETRY_DELAY, + ConfVars.CLIENT_SOCKET_TIMEOUT, + ConfVars.CLIENT_SOCKET_LIFETIME, + ConfVars.PWD, + ConfVars.CONNECT_URL_HOOK, + ConfVars.CONNECT_URL_KEY, + ConfVars.SERVER_MIN_THREADS, + ConfVars.SERVER_MAX_THREADS, + ConfVars.TCP_KEEP_ALIVE, + ConfVars.KERBEROS_KEYTAB_FILE, + ConfVars.KERBEROS_PRINCIPAL, + ConfVars.USE_THRIFT_SASL, + ConfVars.TOKEN_SIGNATURE, + ConfVars.CACHE_PINOBJTYPES, + ConfVars.CONNECTION_POOLING_TYPE, + ConfVars.VALIDATE_TABLES, + ConfVars.DATANUCLEUS_INIT_COL_INFO, + ConfVars.VALIDATE_COLUMNS, + ConfVars.VALIDATE_CONSTRAINTS, + ConfVars.STORE_MANAGER_TYPE, + ConfVars.AUTO_CREATE_ALL, + ConfVars.DATANUCLEUS_TRANSACTION_ISOLATION, + ConfVars.DATANUCLEUS_CACHE_LEVEL2, + ConfVars.DATANUCLEUS_CACHE_LEVEL2_TYPE, + ConfVars.IDENTIFIER_FACTORY, + ConfVars.DATANUCLEUS_PLUGIN_REGISTRY_BUNDLE_CHECK, + ConfVars.AUTHORIZATION_STORAGE_AUTH_CHECKS, + ConfVars.BATCH_RETRIEVE_MAX, + ConfVars.EVENT_LISTENERS, + ConfVars.TRANSACTIONAL_EVENT_LISTENERS, + ConfVars.EVENT_CLEAN_FREQ, + ConfVars.EVENT_EXPIRY_DURATION, + ConfVars.EVENT_MESSAGE_FACTORY, + ConfVars.FILTER_HOOK, + ConfVars.RAW_STORE_IMPL, + ConfVars.END_FUNCTION_LISTENERS, + ConfVars.PART_INHERIT_TBL_PROPS, + ConfVars.BATCH_RETRIEVE_OBJECTS_MAX, + ConfVars.INIT_HOOKS, + ConfVars.PRE_EVENT_LISTENERS, + ConfVars.HMS_HANDLER_ATTEMPTS, + ConfVars.HMS_HANDLER_INTERVAL, + ConfVars.HMS_HANDLER_FORCE_RELOAD_CONF, + ConfVars.PARTITION_NAME_WHITELIST_PATTERN, + ConfVars.ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS, + ConfVars.USERS_IN_ADMIN_ROLE, + ConfVars.HIVE_TXN_MANAGER, + ConfVars.TXN_TIMEOUT, + ConfVars.TXN_MAX_OPEN_BATCH, + ConfVars.TXN_RETRYABLE_SQLEX_REGEX, + ConfVars.STATS_NDV_TUNER, + ConfVars.STATS_NDV_DENSITY_FUNCTION, + ConfVars.AGGREGATE_STATS_CACHE_ENABLED, + ConfVars.AGGREGATE_STATS_CACHE_SIZE, + ConfVars.AGGREGATE_STATS_CACHE_MAX_PARTITIONS, + ConfVars.AGGREGATE_STATS_CACHE_FPP, + ConfVars.AGGREGATE_STATS_CACHE_MAX_VARIANCE, + ConfVars.AGGREGATE_STATS_CACHE_TTL, + ConfVars.AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT, + ConfVars.AGGREGATE_STATS_CACHE_MAX_READER_WAIT, + ConfVars.AGGREGATE_STATS_CACHE_MAX_FULL, + ConfVars.AGGREGATE_STATS_CACHE_CLEAN_UNTIL, + ConfVars.DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES, + ConfVars.FILE_METADATA_THREADS + }; + + /** + * User configurable Metastore vars + */ + private static final MetastoreConf.ConfVars[] metaConfVars = { + ConfVars.TRY_DIRECT_SQL, + ConfVars.TRY_DIRECT_SQL_DDL, + ConfVars.CLIENT_SOCKET_TIMEOUT, + ConfVars.PARTITION_NAME_WHITELIST_PATTERN, + ConfVars.CAPABILITY_CHECK, + ConfVars.DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES + }; + + static { + for (ConfVars confVar : metaConfVars) { + metaConfs.put(confVar.varname, confVar); + metaConfs.put(confVar.hiveName, confVar); + } + } + + /** + * Variables that we should never print the value of for security reasons. + */ + private static final Set<String> unprintables = StringUtils.asSet( + ConfVars.PWD.varname, + ConfVars.PWD.hiveName, + ConfVars.SSL_KEYSTORE_PASSWORD.varname, + ConfVars.SSL_KEYSTORE_PASSWORD.hiveName, + ConfVars.SSL_TRUSTSTORE_PASSWORD.varname, + ConfVars.SSL_TRUSTSTORE_PASSWORD.hiveName + ); + + public static ConfVars getMetaConf(String name) { + return metaConfs.get(name); + } + + public enum ConfVars { + // alpha order, PLEASE! + ADDED_JARS("metastore.added.jars.path", "hive.added.jars.path", "", + "This an internal parameter."), + AGGREGATE_STATS_CACHE_CLEAN_UNTIL("metastore.aggregate.stats.cache.clean.until", + "hive.metastore.aggregate.stats.cache.clean.until", 0.8, + "The cleaner thread cleans until cache reaches this % full size."), + AGGREGATE_STATS_CACHE_ENABLED("metastore.aggregate.stats.cache.enabled", + "hive.metastore.aggregate.stats.cache.enabled", true, + "Whether aggregate stats caching is enabled or not."), + AGGREGATE_STATS_CACHE_FPP("metastore.aggregate.stats.cache.fpp", + "hive.metastore.aggregate.stats.cache.fpp", 0.01, + "Maximum false positive probability for the Bloom Filter used in each aggregate stats cache node (default 1%)."), + AGGREGATE_STATS_CACHE_MAX_FULL("metastore.aggregate.stats.cache.max.full", + "hive.metastore.aggregate.stats.cache.max.full", 0.9, + "Maximum cache full % after which the cache cleaner thread kicks in."), + AGGREGATE_STATS_CACHE_MAX_PARTITIONS("metastore.aggregate.stats.cache.max.partitions", + "hive.metastore.aggregate.stats.cache.max.partitions", 10000, + "Maximum number of partitions that are aggregated per cache node."), + AGGREGATE_STATS_CACHE_MAX_READER_WAIT("metastore.aggregate.stats.cache.max.reader.wait", + "hive.metastore.aggregate.stats.cache.max.reader.wait", 1000, TimeUnit.MILLISECONDS, + "Number of milliseconds a reader will wait to acquire the readlock before giving up."), + AGGREGATE_STATS_CACHE_MAX_VARIANCE("metastore.aggregate.stats.cache.max.variance", + "hive.metastore.aggregate.stats.cache.max.variance", 0.01, + "Maximum tolerable variance in number of partitions between a cached node and our request (default 1%)."), + AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT("metastore.aggregate.stats.cache.max.writer.wait", + "hive.metastore.aggregate.stats.cache.max.writer.wait", 5000, TimeUnit.MILLISECONDS, + "Number of milliseconds a writer will wait to acquire the writelock before giving up."), + AGGREGATE_STATS_CACHE_SIZE("metastore.aggregate.stats.cache.size", + "hive.metastore.aggregate.stats.cache.size", 10000, + "Maximum number of aggregate stats nodes that we will place in the metastore aggregate stats cache."), + AGGREGATE_STATS_CACHE_TTL("metastore.aggregate.stats.cache.ttl", + "hive.metastore.aggregate.stats.cache.ttl", 600, TimeUnit.SECONDS, + "Number of seconds for a cached node to be active in the cache before they become stale."), + ALTER_HANDLER("metastore.alter.handler", "hive.metastore.alter.impl", + HiveAlterHandler.class.getName(), + "Alter handler. For now defaults to the Hive one. Really need a better default option"), + ASYNC_LOG_ENABLED("metastore.async.log.enabled", "hive.async.log.enabled", true, + "Whether to enable Log4j2's asynchronous logging. Asynchronous logging can give\n" + + " significant performance improvement as logging will be handled in separate thread\n" + + " that uses LMAX disruptor queue for buffering log messages.\n" + + " Refer https://logging.apache.org/log4j/2.x/manual/async.html for benefits and\n" + + " drawbacks."), + AUTHORIZATION_STORAGE_AUTH_CHECKS("metastore.authorization.storage.checks", + "hive.metastore.authorization.storage.checks", false, + "Should the metastore do authorization checks against the underlying storage (usually hdfs) \n" + + "for operations like drop-partition (disallow the drop-partition if the user in\n" + + "question doesn't have permissions to delete the corresponding directory\n" + + "on the storage)."), + AUTO_CREATE_ALL("datanucleus.schema.autoCreateAll", "datanucleus.schema.autoCreateAll", false, + "Auto creates necessary schema on a startup if one doesn't exist. Set this to false, after creating it once." + + "To enable auto create also set hive.metastore.schema.verification=false. Auto creation is not " + + "recommended for production use cases, run schematool command instead." ), + BATCH_RETRIEVE_MAX("metastore.batch.retrieve.max", "hive.metastore.batch.retrieve.max", 300, + "Maximum number of objects (tables/partitions) can be retrieved from metastore in one batch. \n" + + "The higher the number, the less the number of round trips is needed to the Hive metastore server, \n" + + "but it may also cause higher memory requirement at the client side."), + BATCH_RETRIEVE_OBJECTS_MAX("metastore.batch.retrieve.table.partition.max", + "hive.metastore.batch.retrieve.table.partition.max", 1000, + "Maximum number of objects that metastore internally retrieves in one batch."), + CACHE_PINOBJTYPES("metastore.cache.pinobjtypes", "hive.metastore.cache.pinobjtypes", + "Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order", + "List of comma separated metastore object types that should be pinned in the cache"), + CACHED_RAW_STORE_IMPL("metastore.cached.rawstore.impl", + "hive.metastore.cached.rawstore.impl", "org.apache.hadoop.hive.metastore.ObjectStore", + "Name of the wrapped RawStore class"), + CACHED_RAW_STORE_CACHE_UPDATE_FREQUENCY("metastore.cached.rawstore.cache.update.frequency", + "hive.metastore.cached.rawstore.cache.update.frequency", 60, TimeUnit.SECONDS, + "The time after which metastore cache is updated from metastore DB."), + CACHED_RAW_STORE_CACHED_OBJECTS_WHITELIST("metastore.cached.rawstore.cached.object.whitelist", + "hive.metastore.cached.rawstore.cached.object.whitelist", ".*", "Comma separated list of regular expressions \n " + + "to select the tables (and its partitions, stats etc) that will be cached by CachedStore. \n" + + "This can be used in conjunction with hive.metastore.cached.rawstore.cached.object.blacklist. \n" + + "Example: .*, db1.*, db2\\.tbl.*. The last item can potentially override patterns specified before."), + CACHED_RAW_STORE_CACHED_OBJECTS_BLACKLIST("metastore.cached.rawstore.cached.object.blacklist", + "hive.metastore.cached.rawstore.cached.object.blacklist", "", "Comma separated list of regular expressions \n " + + "to filter out the tables (and its partitions, stats etc) that will be cached by CachedStore. \n" + + "This can be used in conjunction with hive.metastore.cached.rawstore.cached.object.whitelist. \n" + + "Example: db2.*, db3\\.tbl1, db3\\..*. The last item can potentially override patterns specified before. \n" + + "The blacklist also overrides the whitelist."), + CACHED_RAW_STORE_MAX_CACHE_MEMORY("metastore.cached.rawstore.max.cache.memory", + "hive.metastore.cached.rawstore.max.cache.memory", "1Gb", new SizeValidator(), + "The maximum memory in bytes that the cached objects can use. " + + "Memory used is calculated based on estimated size of tables and partitions in the cache. " + + "Setting it to a negative value disables memory estimation."), + CAPABILITY_CHECK("metastore.client.capability.check", + "hive.metastore.client.capability.check", true, + "Whether to check client capabilities for potentially breaking API usage."), + CATALOG_DEFAULT("metastore.catalog.default", "metastore.catalog.default", "hive", + "The default catalog to use when a catalog is not specified. Default is 'hive' (the " + + "default catalog)."), + CATALOGS_TO_CACHE("metastore.cached.rawstore.catalogs", "metastore.cached.rawstore.catalogs", + "hive", "Comma separated list of catalogs to cache in the CachedStore. Default is 'hive' " + + "(the default catalog). Empty string means all catalogs will be cached."), + CLIENT_CONNECT_RETRY_DELAY("metastore.client.connect.retry.delay", + "hive.metastore.client.connect.retry.delay", 1, TimeUnit.SECONDS, + "Number of seconds for the client to wait between consecutive connection attempts"), + CLIENT_KERBEROS_PRINCIPAL("metastore.client.kerberos.principal", + "hive.metastore.client.kerberos.principal", + "", // E.g. "hive-metastore/[email protected]". + "The Kerberos principal associated with the HA cluster of hcat_servers."), + CLIENT_SOCKET_LIFETIME("metastore.client.socket.lifetime", + "hive.metastore.client.socket.lifetime", 0, TimeUnit.SECONDS, + "MetaStore Client socket lifetime in seconds. After this time is exceeded, client\n" + + "reconnects on the next MetaStore operation. A value of 0s means the connection\n" + + "has an infinite lifetime."), + CLIENT_SOCKET_TIMEOUT("metastore.client.socket.timeout", "hive.metastore.client.socket.timeout", 600, + TimeUnit.SECONDS, "MetaStore Client socket timeout in seconds"), + COMPACTOR_HISTORY_REAPER_INTERVAL("metastore.compactor.history.reaper.interval", + "hive.compactor.history.reaper.interval", 2, TimeUnit.MINUTES, + "Determines how often compaction history reaper runs"), + COMPACTOR_HISTORY_RETENTION_ATTEMPTED("metastore.compactor.history.retention.attempted", + "hive.compactor.history.retention.attempted", 2, + new RangeValidator(0, 100), "Determines how many attempted compaction records will be " + + "retained in compaction history for a given table/partition."), + COMPACTOR_HISTORY_RETENTION_FAILED("metastore.compactor.history.retention.failed", + "hive.compactor.history.retention.failed", 3, + new RangeValidator(0, 100), "Determines how many failed compaction records will be " + + "retained in compaction history for a given table/partition."), + COMPACTOR_HISTORY_RETENTION_SUCCEEDED("metastore.compactor.history.retention.succeeded", + "hive.compactor.history.retention.succeeded", 3, + new RangeValidator(0, 100), "Determines how many successful compaction records will be " + + "retained in compaction history for a given table/partition."), + COMPACTOR_INITIATOR_FAILED_THRESHOLD("metastore.compactor.initiator.failed.compacts.threshold", + "hive.compactor.initiator.failed.compacts.threshold", 2, + new RangeValidator(1, 20), "Number of consecutive compaction failures (per table/partition) " + + "after which automatic compactions will not be scheduled any more. Note that this must be less " + + "than hive.compactor.history.retention.failed."), + COMPACTOR_INITIATOR_ON("metastore.compactor.initiator.on", "hive.compactor.initiator.on", false, + "Whether to run the initiator and cleaner threads on this metastore instance or not.\n" + + "Set this to true on one instance of the Thrift metastore service as part of turning\n" + + "on Hive transactions. For a complete list of parameters required for turning on\n" + + "transactions, see hive.txn.manager."), + COMPACTOR_WORKER_THREADS("metastore.compactor.worker.threads", + "hive.compactor.worker.threads", 0, + "How many compactor worker threads to run on this metastore instance. Set this to a\n" + + "positive number on one or more instances of the Thrift metastore service as part of\n" + + "turning on Hive transactions. For a complete list of parameters required for turning\n" + + "on transactions, see hive.txn.manager.\n" + + "Worker threads spawn MapReduce jobs to do compactions. They do not do the compactions\n" + + "themselves. Increasing the number of worker threads will decrease the time it takes\n" + + "tables or partitions to be compacted once they are determined to need compaction.\n" + + "It will also increase the background load on the Hadoop cluster as more MapReduce jobs\n" + + "will be running in the background."), + CONNECTION_DRIVER("javax.jdo.option.ConnectionDriverName", + "javax.jdo.option.ConnectionDriverName", "org.apache.derby.jdbc.EmbeddedDriver", + "Driver class name for a JDBC metastore"), + CONNECTION_POOLING_MAX_CONNECTIONS("datanucleus.connectionPool.maxPoolSize", + "datanucleus.connectionPool.maxPoolSize", 10, + "Specify the maximum number of connections in the connection pool. Note: The configured size will be used by\n" + + "2 connection pools (TxnHandler and ObjectStore). When configuring the max connection pool size, it is\n" + + "recommended to take into account the number of metastore instances and the number of HiveServer2 instances\n" + + "configured with embedded metastore. To get optimal performance, set config to meet the following condition\n"+ + "(2 * pool_size * metastore_instances + 2 * pool_size * HS2_instances_with_embedded_metastore) = \n" + + "(2 * physical_core_count + hard_disk_count)."), + CONNECT_URL_HOOK("metastore.ds.connection.url.hook", + "hive.metastore.ds.connection.url.hook", "", + "Name of the hook to use for retrieving the JDO connection URL. If empty, the value in javax.jdo.option.ConnectionURL is used"), + CONNECT_URL_KEY("javax.jdo.option.ConnectionURL", + "javax.jdo.option.ConnectionURL", + "jdbc:derby:;databaseName=metastore_db;create=true", + "JDBC connect string for a JDBC metastore.\n" + + "To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL.\n" + + "For example, jdbc:postgresql://myhost/db?ssl=true for postgres database."), + CONNECTION_POOLING_TYPE("datanucleus.connectionPoolingType", + "datanucleus.connectionPoolingType", "HikariCP", new StringSetValidator("BONECP", "DBCP", + "HikariCP", "NONE"), + "Specify connection pool library for datanucleus"), + CONNECTION_USER_NAME("javax.jdo.option.ConnectionUserName", + "javax.jdo.option.ConnectionUserName", "APP", + "Username to use against metastore database"), + CREATE_TABLES_AS_ACID("metastore.create.as.acid", "hive.create.as.acid", false, + "Whether the eligible tables should be created as full ACID by default. Does \n" + + "not apply to external tables, the ones using storage handlers, etc."), + COUNT_OPEN_TXNS_INTERVAL("metastore.count.open.txns.interval", "hive.count.open.txns.interval", + 1, TimeUnit.SECONDS, "Time in seconds between checks to count open transactions."), + DATANUCLEUS_AUTOSTART("datanucleus.autoStartMechanismMode", + "datanucleus.autoStartMechanismMode", "ignored", new StringSetValidator("ignored"), + "Autostart mechanism for datanucleus. Currently ignored is the only option supported."), + DATANUCLEUS_CACHE_LEVEL2("datanucleus.cache.level2", "datanucleus.cache.level2", false, + "Use a level 2 cache. Turn this off if metadata is changed independently of Hive metastore server"), + DATANUCLEUS_CACHE_LEVEL2_TYPE("datanucleus.cache.level2.type", + "datanucleus.cache.level2.type", "none", ""), + DATANUCLEUS_INIT_COL_INFO("datanucleus.rdbms.initializeColumnInfo", + "datanucleus.rdbms.initializeColumnInfo", "NONE", + "initializeColumnInfo setting for DataNucleus; set to NONE at least on Postgres."), + DATANUCLEUS_PLUGIN_REGISTRY_BUNDLE_CHECK("datanucleus.plugin.pluginRegistryBundleCheck", + "datanucleus.plugin.pluginRegistryBundleCheck", "LOG", + "Defines what happens when plugin bundles are found and are duplicated [EXCEPTION|LOG|NONE]"), + DATANUCLEUS_TRANSACTION_ISOLATION("datanucleus.transactionIsolation", + "datanucleus.transactionIsolation", "read-committed", + "Default transaction isolation level for identity generation."), + DATANUCLEUS_USE_LEGACY_VALUE_STRATEGY("datanucleus.rdbms.useLegacyNativeValueStrategy", + "datanucleus.rdbms.useLegacyNativeValueStrategy", true, ""), + DBACCESS_SSL_PROPS("metastore.dbaccess.ssl.properties", "hive.metastore.dbaccess.ssl.properties", "", + "Comma-separated SSL properties for metastore to access database when JDO connection URL\n" + + "enables SSL access. e.g. javax.net.ssl.trustStore=/tmp/truststore,javax.net.ssl.trustStorePassword=pwd."), + DEFAULTPARTITIONNAME("metastore.default.partition.name", + "hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__", + "The default partition name in case the dynamic partition column value is null/empty string or any other values that cannot be escaped. \n" + + "This value must not contain any special character used in HDFS URI (e.g., ':', '%', '/' etc). \n" + + "The user has to be aware that the dynamic partition value should not contain this value to avoid confusions."), + DELEGATION_KEY_UPDATE_INTERVAL("metastore.cluster.delegation.key.update-interval", + "hive.cluster.delegation.key.update-interval", 1, TimeUnit.DAYS, ""), + DELEGATION_TOKEN_GC_INTERVAL("metastore.cluster.delegation.token.gc-interval", + "hive.cluster.delegation.token.gc-interval", 1, TimeUnit.HOURS, ""), + DELEGATION_TOKEN_MAX_LIFETIME("metastore.cluster.delegation.token.max-lifetime", + "hive.cluster.delegation.token.max-lifetime", 7, TimeUnit.DAYS, ""), + DELEGATION_TOKEN_RENEW_INTERVAL("metastore.cluster.delegation.token.renew-interval", + "hive.cluster.delegation.token.renew-interval", 1, TimeUnit.DAYS, ""), + DELEGATION_TOKEN_STORE_CLS("metastore.cluster.delegation.token.store.class", + "hive.cluster.delegation.token.store.class", MetastoreDelegationTokenManager.class.getName(), + "Class to store delegation tokens"), + DETACH_ALL_ON_COMMIT("javax.jdo.option.DetachAllOnCommit", + "javax.jdo.option.DetachAllOnCommit", true, + "Detaches all objects from session so that they can be used after transaction is committed"), + DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE("metastore.direct.sql.max.elements.in.clause", + "hive.direct.sql.max.elements.in.clause", 1000, + "The maximum number of values in a IN clause. Once exceeded, it will be broken into\n" + + " multiple OR separated IN clauses."), + DIRECT_SQL_MAX_ELEMENTS_VALUES_CLAUSE("metastore.direct.sql.max.elements.values.clause", + "hive.direct.sql.max.elements.values.clause", + 1000, "The maximum number of values in a VALUES clause for INSERT statement."), + DIRECT_SQL_MAX_QUERY_LENGTH("metastore.direct.sql.max.query.length", + "hive.direct.sql.max.query.length", 100, "The maximum\n" + + " size of a query string (in KB)."), + DIRECT_SQL_PARTITION_BATCH_SIZE("metastore.direct.sql.batch.size", + "hive.metastore.direct.sql.batch.size", 0, + "Batch size for partition and other object retrieval from the underlying DB in direct\n" + + "SQL. For some DBs like Oracle and MSSQL, there are hardcoded or perf-based limitations\n" + + "that necessitate this. For DBs that can handle the queries, this isn't necessary and\n" + + "may impede performance. -1 means no batching, 0 means automatic batching."), + DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES("metastore.disallow.incompatible.col.type.changes", + "hive.metastore.disallow.incompatible.col.type.changes", true, + "If true, ALTER TABLE operations which change the type of a\n" + + "column (say STRING) to an incompatible type (say MAP) are disallowed.\n" + + "RCFile default SerDe (ColumnarSerDe) serializes the values in such a way that the\n" + + "datatypes can be converted from string to any type. The map is also serialized as\n" + + "a string, which can be read as a string as well. However, with any binary\n" + + "serialization, this is not true. Blocking the ALTER TABLE prevents ClassCastExceptions\n" + + "when subsequently trying to access old partitions.\n" + + "\n" + + "Primitive types like INT, STRING, BIGINT, etc., are compatible with each other and are\n" + + "not blocked.\n" + + "\n" + + "See HIVE-4409 for more details."), + DUMP_CONFIG_ON_CREATION("metastore.dump.config.on.creation", "metastore.dump.config.on.creation", true, + "If true, a printout of the config file (minus sensitive values) will be dumped to the " + + "log whenever newMetastoreConf() is called. Can produce a lot of logs"), + END_FUNCTION_LISTENERS("metastore.end.function.listeners", + "hive.metastore.end.function.listeners", "", + "List of comma separated listeners for the end of metastore functions."), + EVENT_CLEAN_FREQ("metastore.event.clean.freq", "hive.metastore.event.clean.freq", 0, + TimeUnit.SECONDS, "Frequency at which timer task runs to purge expired events in metastore."), + EVENT_EXPIRY_DURATION("metastore.event.expiry.duration", "hive.metastore.event.expiry.duration", + 0, TimeUnit.SECONDS, "Duration after which events expire from events table"), + EVENT_LISTENERS("metastore.event.listeners", "hive.metastore.event.listeners", "", + "A comma separated list of Java classes that implement the org.apache.riven.MetaStoreEventListener" + + " interface. The metastore event and corresponding listener method will be invoked in separate JDO transactions. " + + "Alternatively, configure hive.metastore.transactional.event.listeners to ensure both are invoked in same JDO transaction."), + EVENT_MESSAGE_FACTORY("metastore.event.message.factory", + "hive.metastore.event.message.factory", + "org.apache.hadoop.hive.metastore.messaging.json.JSONMessageFactory", + "Factory class for making encoding and decoding messages in the events generated."), + EVENT_DB_LISTENER_TTL("metastore.event.db.listener.timetolive", + "hive.metastore.event.db.listener.timetolive", 86400, TimeUnit.SECONDS, + "time after which events will be removed from the database listener queue"), + EVENT_DB_NOTIFICATION_API_AUTH("metastore.metastore.event.db.notification.api.auth", + "hive.metastore.event.db.notification.api.auth", true, + "Should metastore do authorization against database notification related APIs such as get_next_notification.\n" + + "If set to true, then only the superusers in proxy settings have the permission"), + EXECUTE_SET_UGI("metastore.execute.setugi", "hive.metastore.execute.setugi", true, + "In unsecure mode, setting this property to true will cause the metastore to execute DFS operations using \n" + + "the client's reported user and group permissions. Note that this property must be set on \n" + + "both the client and server sides. Further note that its best effort. \n" + + "If client sets its to true and server sets it to false, client setting will be ignored."), + EXPRESSION_PROXY_CLASS("metastore.expression.proxy", "hive.metastore.expression.proxy", + "org.apache.hadoop.hive.ql.optimizer.ppr.PartitionExpressionForMetastore", + "Class to use to process expressions in partition pruning."), + FILE_METADATA_THREADS("metastore.file.metadata.threads", + "hive.metastore.hbase.file.metadata.threads", 1, + "Number of threads to use to read file metadata in background to cache it."), + FILTER_HOOK("metastore.filter.hook", "hive.metastore.filter.hook", + org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl.class.getName(), + "Metastore hook class for filtering the metadata read results. If hive.security.authorization.manager" + + "is set to instance of HiveAuthorizerFactory, then this value is ignored."), + FS_HANDLER_CLS("metastore.fs.handler.class", "hive.metastore.fs.handler.class", + "org.apache.hadoop.hive.metastore.HiveMetaStoreFsImpl", ""), + FS_HANDLER_THREADS_COUNT("metastore.fshandler.threads", "hive.metastore.fshandler.threads", 15, + "Number of threads to be allocated for metastore handler for fs operations."), + HMS_HANDLER_ATTEMPTS("metastore.hmshandler.retry.attempts", "hive.hmshandler.retry.attempts", 10, + "The number of times to retry a HMSHandler call if there were a connection error."), + HMS_HANDLER_FORCE_RELOAD_CONF("metastore.hmshandler.force.reload.conf", + "hive.hmshandler.force.reload.conf", false, + "Whether to force reloading of the HMSHandler configuration (including\n" + + "the connection URL, before the next metastore query that accesses the\n" + + "datastore. Once reloaded, this value is reset to false. Used for\n" + + "testing only."), + HMS_HANDLER_INTERVAL("metastore.hmshandler.retry.interval", "hive.hmshandler.retry.interval", + 2000, TimeUnit.MILLISECONDS, "The time between HMSHandler retry attempts on failure."), + IDENTIFIER_FACTORY("datanucleus.identifierFactory", + "datanucleus.identifierFactory", "datanucleus1", + "Name of the identifier factory to use when generating table/column names etc. \n" + + "'datanucleus1' is used for backward compatibility with DataNucleus v1"), + INIT_HOOKS("metastore.init.hooks", "hive.metastore.init.hooks", "", + "A comma separated list of hooks to be invoked at the beginning of HMSHandler initialization. \n" + + "An init hook is specified as the name of Java class which extends org.apache.riven.MetaStoreInitListener."), + INIT_METADATA_COUNT_ENABLED("metastore.initial.metadata.count.enabled", + "hive.metastore.initial.metadata.count.enabled", true, + "Enable a metadata count at metastore startup for metrics."), + INTEGER_JDO_PUSHDOWN("metastore.integral.jdo.pushdown", + "hive.metastore.integral.jdo.pushdown", false, + "Allow JDO query pushdown for integral partition columns in metastore. Off by default. This\n" + + "improves metastore perf for integral columns, especially if there's a large number of partitions.\n" + + "However, it doesn't work correctly with integral values that are not normalized (e.g. have\n" + + "leading zeroes, like 0012). If metastore direct SQL is enabled and works, this optimization\n" + + "is also irrelevant."), + KERBEROS_KEYTAB_FILE("metastore.kerberos.keytab.file", + "hive.metastore.kerberos.keytab.file", "", + "The path to the Kerberos Keytab file containing the metastore Thrift server's service principal."), + KERBEROS_PRINCIPAL("metastore.kerberos.principal", "hive.metastore.kerberos.principal", + "hive-metastore/[email protected]", + "The service principal for the metastore Thrift server. \n" + + "The special string _HOST will be replaced automatically with the correct host name."), + LIMIT_PARTITION_REQUEST("metastore.limit.partition.request", + "hive.metastore.limit.partition.request", -1, + "This limits the number of partitions (whole partition objects) that can be requested " + + "from the metastore for a give table. MetaStore API methods using this are: \n" + + "get_partitions, \n" + + "get_partitions_with_auth, \n" + + "get_partitions_by_filter, \n" + + "get_partitions_by_expr.\n" + + "The default value \"-1\" means no limit."), + LOG4J_FILE("metastore.log4j.file", "hive.log4j.file", "", + "Hive log4j configuration file.\n" + + "If the property is not set, then logging will be initialized using metastore-log4j2.properties found on the classpath.\n" + + "If the property is set, the value must be a valid URI (java.net.URI, e.g. \"file:///tmp/my-logging.xml\"), \n" + + "which you can then extract a URL from and pass to PropertyConfigurator.configure(URL)."), + MANAGER_FACTORY_CLASS("javax.jdo.PersistenceManagerFactoryClass", + "javax.jdo.PersistenceManagerFactoryClass", + "org.datanucleus.api.jdo.JDOPersistenceManagerFactory", + "class implementing the jdo persistence"), + MATERIALIZATIONS_INVALIDATION_CACHE_IMPL("metastore.materializations.invalidation.impl", + "hive.metastore.materializations.invalidation.impl", "DEFAULT", + new StringSetValidator("DEFAULT", "DISABLE"), + "The implementation that we should use for the materializations invalidation cache. \n" + + " DEFAULT: Default implementation for invalidation cache\n" + + " DISABLE: Disable invalidation cache (debugging purposes)"), + MATERIALIZATIONS_INVALIDATION_CACHE_CLEAN_FREQUENCY("metastore.materializations.invalidation.clean.frequency", + "hive.metastore.materializations.invalidation.clean.frequency", + 3600, TimeUnit.SECONDS, "Frequency at which timer task runs to remove unnecessary transaction entries from" + + "materializations invalidation cache."), + MATERIALIZATIONS_INVALIDATION_CACHE_EXPIRY_DURATION("metastore.materializations.invalidation.max.duration", + "hive.metastore.materializations.invalidation.max.duration", + 86400, TimeUnit.SECONDS, "Maximum duration for query producing a materialization. After this time, transaction" + + "entries that are not relevant for materializations can be removed from invalidation cache."), + + RUNTIME_STATS_CLEAN_FREQUENCY("runtime.stats.clean.frequency", "hive.metastore.runtime.stats.clean.frequency", 3600, + TimeUnit.SECONDS, "Frequency at which timer task runs to remove outdated runtime stat entries."), + RUNTIME_STATS_MAX_AGE("runtime.stats.max.age", "hive.metastore.runtime.stats.max.age", 86400 * 3, TimeUnit.SECONDS, + "Stat entries which are older than this are removed."), + + // Parameters for exporting metadata on table drop (requires the use of the) + // org.apache.hadoop.hive.ql.parse.MetaDataExportListener preevent listener + METADATA_EXPORT_LOCATION("metastore.metadata.export.location", "hive.metadata.export.location", + "", + "When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" + + "it is the location to which the metadata will be exported. The default is an empty string, which results in the \n" + + "metadata being exported to the current user's home directory on HDFS."), + MOVE_EXPORTED_METADATA_TO_TRASH("metastore.metadata.move.exported.metadata.to.trash", + "hive.metadata.move.exported.metadata.to.trash", true, + "When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" + + "this setting determines if the metadata that is exported will subsequently be moved to the user's trash directory \n" + + "alongside the dropped table data. This ensures that the metadata will be cleaned up along with the dropped table data."), + METRICS_ENABLED("metastore.metrics.enabled", "hive.metastore.metrics.enabled", false, + "Enable metrics on the metastore."), + METRICS_HADOOP2_COMPONENT_NAME("metastore.metrics.hadoop2.component", "hive.service.metrics.hadoop2.component", "hivemetastore", + "Component name to provide to Hadoop2 Metrics system."), + METRICS_JSON_FILE_INTERVAL("metastore.metrics.file.frequency", + "hive.service.metrics.file.frequency", 1, TimeUnit.MINUTES, + "For json metric reporter, the frequency of updating JSON metrics file."), + METRICS_JSON_FILE_LOCATION("metastore.metrics.file.location", + "hive.service.metrics.file.location", "/tmp/report.json", + "For metric class json metric reporter, the location of local JSON metrics file. " + + "This file will get overwritten at every interval."), + METRICS_REPORTERS("metastore.metrics.reporters", "metastore.metrics.reporters", "json,jmx", + new StringSetValidator("json", "jmx", "console", "hadoop"), + "A comma separated list of metrics reporters to start"), + MULTITHREADED("javax.jdo.option.Multithreaded", "javax.jdo.option.Multithreaded", true, + "Set this to true if multiple threads access metastore through JDO concurrently."), + MAX_OPEN_TXNS("metastore.max.open.txns", "hive.max.open.txns", 100000, + "Maximum number of open transactions. If \n" + + "current open transactions reach this limit, future open transaction requests will be \n" + + "rejected, until this number goes below the limit."), + NON_TRANSACTIONAL_READ("javax.jdo.option.NonTransactionalRead", + "javax.jdo.option.NonTransactionalRead", true, + "Reads outside of transactions"), + NOTIFICATION_SEQUENCE_LOCK_MAX_RETRIES("metastore.notification.sequence.lock.max.retries", + "hive.notification.sequence.lock.max.retries", 5, + "Number of retries required to acquire a lock when getting the next notification sequential ID for entries " + + "in the NOTIFICATION_LOG table."), + NOTIFICATION_SEQUENCE_LOCK_RETRY_SLEEP_INTERVAL( + "metastore.notification.sequence.lock.retry.sleep.interval", + "hive.notification.sequence.lock.retry.sleep.interval", 500, TimeUnit.MILLISECONDS, + "Sleep interval between retries to acquire a notification lock as described part of property " + + NOTIFICATION_SEQUENCE_LOCK_MAX_RETRIES.name()), + ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS("metastore.orm.retrieveMapNullsAsEmptyStrings", + "hive.metastore.orm.retrieveMapNullsAsEmptyStrings",false, + "Thrift does not support nulls in maps, so any nulls present in maps retrieved from ORM must " + + "either be pruned or converted to empty strings. Some backing dbs such as Oracle persist empty strings " + + "as nulls, so we should set this parameter if we wish to reverse that behaviour. For others, " + + "pruning is the correct behaviour"), + PARTITION_NAME_WHITELIST_PATTERN("metastore.partition.name.whitelist.pattern", + "hive.metastore.partition.name.whitelist.pattern", "", + "Partition names will be checked against this regex pattern and rejected if not matched."), + PART_INHERIT_TBL_PROPS("metastore.partition.inherit.table.properties", + "hive.metastore.partition.inherit.table.properties", "", + "List of comma separated keys occurring in table properties which will get inherited to newly created partitions. \n" + + "* implies all the keys will get inherited."), + PRE_EVENT_LISTENERS("metastore.pre.event.listeners", "hive.metastore.pre.event.listeners", "", + "List of comma separated listeners for metastore events."), + PWD("javax.jdo.option.ConnectionPassword", "javax.jdo.option.ConnectionPassword", "mine", + "password to use against metastore database"), + RAW_STORE_IMPL("metastore.rawstore.impl", "hive.metastore.rawstore.impl", + "org.apache.hadoop.hive.metastore.ObjectStore", + "Name of the class that implements org.apache.riven.rawstore interface. \n" + + "This class is used to store and retrieval of raw metadata objects such as table, database"), + REPLCMDIR("metastore.repl.cmrootdir", "hive.repl.cmrootdir", "/user/hive/cmroot/", + "Root dir for ChangeManager, used for deleted files."), + REPLCMRETIAN("metastore.repl.cm.retain", "hive.repl.cm.retain", 24, TimeUnit.HOURS, + "Time to retain removed files in cmrootdir."), + REPLCMINTERVAL("metastore.repl.cm.interval", "hive.repl.cm.interval", 3600, TimeUnit.SECONDS, + "Inteval for cmroot cleanup thread."), + REPLCMENABLED("metastore.repl.cm.enabled", "hive.repl.cm.enabled", false, + "Turn on ChangeManager, so delete files will go to cmrootdir."), + REPLDIR("metastore.repl.rootdir", "hive.repl.rootdir", "/user/hive/repl/", + "HDFS root dir for all replication dumps."), + REPL_COPYFILE_MAXNUMFILES("metastore.repl.copyfile.maxnumfiles", + "hive.exec.copyfile.maxnumfiles", 1L, + "Maximum number of files Hive uses to do sequential HDFS copies between directories." + + "Distributed copies (distcp) will be used instead for larger numbers of files so that copies can be done faster."), + REPL_COPYFILE_MAXSIZE("metastore.repl.copyfile.maxsize", + "hive.exec.copyfile.maxsize", 32L * 1024 * 1024 /*32M*/, + "Maximum file size (in bytes) that Hive uses to do single HDFS copies between directories." + + "Distributed copies (distcp) will be used instead for bigger files so that copies can be done faster."), + SCHEMA_INFO_CLASS("metastore.schema.info.class", "hive.metastore.schema.info.class", + "org.apache.hadoop.hive.metastore.MetaStoreSchemaInfo", + "Fully qualified class name for the metastore schema information class \n" + + "which is used by schematool to fetch the schema information.\n" + + " This class should implement the IMetaStoreSchemaInfo interface"), + SCHEMA_VERIFICATION("metastore.schema.verification", "hive.metastore.schema.verification", true, + "Enforce metastore schema version consistency.\n" + + "True: Verify that version information stored in is compatible with one from Hive jars. Also disable automatic\n" + + " schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures\n" + + " proper metastore schema migration. (Default)\n" + + "False: Warn if the version information stored in metastore doesn't match with one from in Hive jars."), + SCHEMA_VERIFICATION_RECORD_VERSION("metastore.schema.verification.record.version", + "hive.metastore.schema.verification.record.version", false, + "When true the current MS version is recorded in the VERSION table. If this is disabled and verification is\n" + + " enabled the MS will be unusable."), + SERDES_USING_METASTORE_FOR_SCHEMA("metastore.serdes.using.metastore.for.schema", + "hive.serdes.using.metastore.for.schema", + "org.apache.hadoop.hive.ql.io.orc.OrcSerde," + + "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," + + "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe," + + "org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe," + + "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe," + + "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe," + + "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe," + + "org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe", + "SerDes retrieving schema from metastore. This is an internal parameter."), + SERVER_MAX_MESSAGE_SIZE("metastore.server.max.message.size", + "hive.metastore.server.max.message.size", 100*1024*1024L, + "Maximum message size in bytes a HMS will accept."), + SERVER_MAX_THREADS("metastore.server.max.threads", + "hive.metastore.server.max.threads", 1000, + "Maximum number of worker threads in the Thrift server's pool."), + SERVER_MIN_THREADS("metastore.server.min.threads", "hive.metastore.server.min.threads", 200, + "Minimum number of worker threads in the Thrift server's pool."), + SERVER_PORT("metastore.thrift.port", "hive.metastore.port", 9083, + "Hive metastore listener port"), + SSL_KEYSTORE_PASSWORD("metastore.keystore.password", "hive.metastore.keystore.password", "", + "Metastore SSL certificate keystore password."), + SSL_KEYSTORE_PATH("metastore.keystore.path", "hive.metastore.keystore.path", "", + "Metastore SSL certificate keystore location."), + SSL_PROTOCOL_BLACKLIST("metastore.ssl.protocol.blacklist", "hive.ssl.protocol.blacklist", + "SSLv2,SSLv3", "SSL Versions to disable for all Hive Servers"), + SSL_TRUSTSTORE_PATH("metastore.truststore.path", "hive.metastore.truststore.path", "", + "Metastore SSL certificate truststore location."), + SSL_TRUSTSTORE_PASSWORD("metastore.truststore.password", "hive.metastore.truststore.password", "", + "Metastore SSL certificate truststore password."), + STATS_AUTO_GATHER("metastore.stats.autogather", "hive.stats.autogather", true, + "A flag to gather statistics (only basic) automatically during the INSERT OVERWRITE command."), + STATS_FETCH_BITVECTOR("metastore.stats.fetch.bitvector", "hive.stats.fetch.bitvector", false, + "Whether we fetch bitvector when we compute ndv. Users can turn it off if they want to use old schema"), + STATS_NDV_TUNER("metastore.stats.ndv.tuner", "hive.metastore.stats.ndv.tuner", 0.0, + "Provides a tunable parameter between the lower bound and the higher bound of ndv for aggregate ndv across all the partitions. \n" + + "The lower bound is equal to the maximum of ndv of all the partitions. The higher bound is equal to the sum of ndv of all the partitions.\n" + + "Its value should be between 0.0 (i.e., choose lower bound) and 1.0 (i.e., choose higher bound)"), + STATS_NDV_DENSITY_FUNCTION("metastore.stats.ndv.densityfunction", + "hive.metastore.stats.ndv.densityfunction", false, + "Whether to use density function to estimate the NDV for the whole table based on the NDV of partitions"), + STATS_DEFAULT_AGGREGATOR("metastore.stats.default.aggregator", "hive.stats.default.aggregator", + "", + "The Java class (implementing the StatsAggregator interface) that is used by default if hive.stats.dbclass is custom type."), + STATS_DEFAULT_PUBLISHER("metastore.stats.default.publisher", "hive.stats.default.publisher", "", + "The Java class (implementing the StatsPublisher interface) that is used by default if hive.stats.dbclass is custom type."), + STATS_AUTO_UPDATE("metastore.stats.auto.analyze", "hive.metastore.stats.auto.analyze", "none", + new EnumValidator(StatsUpdateMode.values()), + "Whether to update stats in the background; none - no, all - for all tables, existing - only existing, out of date, stats."), + STATS_AUTO_UPDATE_NOOP_WAIT("metastore.stats.auto.analyze.noop.wait", + "hive.metastore.stats.auto.analyze.noop.wait", 5L, TimeUnit.MINUTES, + new TimeValidator(TimeUnit.MINUTES), + "How long to sleep if there were no stats needing update during an update iteration.\n" + + "This is a setting to throttle table/partition checks when nothing is being changed; not\n" + + "the analyze queries themselves."), + STATS_AUTO_UPDATE_WORKER_COUNT("metastore.stats.auto.analyze.worker.count", + "hive.metastore.stats.auto.analyze.worker.count", 1, + "Number of parallel analyze commands to run for background stats update."), + STORAGE_SCHEMA_READER_IMPL("metastore.storage.schema.reader.impl", "metastore.storage.schema.reader.impl", + DefaultStorageSchemaReader.class.getName(), + "The class to use to read schemas from storage. It must implement " + + "org.apache.hadoop.hive.metastore.StorageSchemaReader"), + STORE_MANAGER_TYPE("datanucleus.storeManagerType", "datanucleus.storeManagerType", "rdbms", "metadata store type"), + STRICT_MANAGED_TABLES("metastore.strict.managed.tables", "hive.strict.managed.tables", false, + "Whether strict managed tables mode is enabled. With this mode enabled, " + + "only transactional tables (both full and insert-only) are allowed to be created as managed tables"), + SUPPORT_SPECICAL_CHARACTERS_IN_TABLE_NAMES("metastore.support.special.characters.tablename", + "hive.support.special.characters.tablename", true, + "This flag should be set to true to enable support for special characters in table names.\n" + + "When it is set to false, only [a-zA-Z_0-9]+ are supported.\n" + + "The only supported special character right now is '/'. This flag applies only to quoted table names.\n" + + "The default value is true."), + TASK_THREADS_ALWAYS("metastore.task.threads.always", "metastore.task.threads.always", + EventCleanerTask.class.getName() + "," + RuntimeStatsCleanerTask.class.getName() + "," + + "org.apache.hadoop.hive.metastore.repl.DumpDirCleanerTask" + "," + + "org.apache.hadoop.hive.metastore.HiveProtoEventsCleanerTask", + "Comma separated list of tasks that will be started in separate threads. These will " + + "always be started, regardless of whether the metastore is running in embedded mode " + + "or in server mode. They must implement " + MetastoreTaskThread.class.getName()), + TASK_THREADS_REMOTE_ONLY("metastore.task.threads.remote", "metastore.task.threads.remote", + AcidHouseKeeperService.class.getName() + "," + + AcidOpenTxnsCounterService.class.getName() + "," + + AcidCompactionHistoryService.class.getName() + "," + + AcidWriteSetService.class.getName() + "," + + MaterializationsRebuildLockCleanerTask.class.getName(), + "Command separated list of tasks that will be started in separate threads. These will be" + + " started only when the metastore is running as a separate service. They must " + + "implement " + MetastoreTaskThread.class.getName()), + TCP_KEEP_ALIVE("metastore.server.tcp.keepalive", + "hive.metastore.server.tcp.keepalive", true, + "Whether to enable TCP keepalive for the metastore server. Keepalive will prevent accumulation of half-open connections."), + THREAD_POOL_SIZE("metastore.thread.pool.size", "no.such", 10, + "Number of threads in the thread pool. These will be used to execute all background " + + "processes."), + THRIFT_CONNECTION_RETRIES("metastore.connect.retries", "hive.metastore.connect.retries", 3, + "Number of retries while opening a connection to metastore"), + THRIFT_FAILURE_RETRIES("metastore.failure.retries", "hive.metastore.failure.retries", 1, + "Number of retries upon failure of Thrift metastore calls"), + THRIFT_URIS("metastore.thrift.uris", "hive.metastore.uris", "", + "Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore."), + THRIFT_URI_SELECTION("metastore.thrift.uri.selection", "hive.metastore.uri.selection", "RANDOM", + new StringSetValidator("RANDOM", "SEQUENTIAL"), + "Determines the selection mechanism used by metastore client to connect to remote " + + "metastore. SEQUENTIAL implies that the first valid metastore from the URIs specified " + + "as part of hive.metastore.uris will be picked. RANDOM implies that the metastore " + + "will be picked randomly"), + TIMEDOUT_TXN_REAPER_START("metastore.timedout.txn.reaper.start", + "hive.timedout.txn.reaper.start", 100, TimeUnit.SECONDS, + "Time delay of 1st reaper run after metastore start"), + TIMEDOUT_TXN_REAPER_INTERVAL("metastore.timedout.txn.reaper.interval", + "hive.timedout.txn.reaper.interval", 180, TimeUnit.SECONDS, + "Time interval describing how often the reaper runs"), + TOKEN_SIGNATURE("metastore.token.signature", "hive.metastore.token.signature", "", + "The delegation token service name to match when selecting a token from the current user's tokens."), + TRANSACTIONAL_EVENT_LISTENERS("metastore.transactional.event.listeners", + "hive.metastore.transactional.event.listeners", "", + "A comma separated list of Java classes that implement the org.apache.riven.MetaStoreEventListener" + + " interface. Both the metastore event and corresponding listener method will be invoked in the same JDO transaction."), + TRY_DIRECT_SQL("metastore.try.direct.sql", "hive.metastore.try.direct.sql", true, + "Whether the metastore should try to use direct SQL queries instead of the\n" + + "DataNucleus for certain read paths. This can improve metastore performance when\n" + + "fetching many partitions or column statistics by orders of magnitude; however, it\n" + + "is not guaranteed to work on all RDBMS-es and all versions. In case of SQL failures,\n" + + "the metastore will fall back to the DataNucleus, so it's safe even if SQL doesn't\n" + + "work for all queries on your datastore. If all SQL queries fail (for example, your\n" + + "metastore is backed by MongoDB), you might want to disable this to save the\n" + + "try-and-fall-back cost."), + TRY_DIRECT_SQL_DDL("metastore.try.direct.sql.ddl", "hive.metastore.try.direct.sql.ddl", true, + "Same as hive.metastore.try.direct.sql, for read statements within a transaction that\n" + + "modifies metastore data. Due to non-standard behavior in Postgres, if a direct SQL\n" + + "select query has incorrect syntax or something similar inside a transaction, the\n" + + "entire transaction will fail and fall-back to DataNucleus will not be possible. You\n" + + "should disable the usage of direct SQL inside transactions if that happens in your case."), + TXN_MAX_OPEN_BATCH("metastore.txn.max.open.batch", "hive.txn.max.open.batch", 1000, + "Maximum number of transactions that can be fetched in one call to open_txns().\n" + + "This controls how many transactions streaming agents such as Flume or Storm open\n" + + "simultaneously. The streaming agent then writes that number of entries into a single\n" + + "file (per Flume agent or Storm bolt). Thus increasing this value decreases the number\n" + + "of delta files created by streaming agents. But it also increases the number of open\n" + + "transactions that Hive has to track at any given time, which may negatively affect\n" + + "read performance."), + TXN_RETRYABLE_SQLEX_REGEX("metastore.txn.retryable.sqlex.regex", + "hive.txn.retryable.sqlex.regex", "", "Comma separated list\n" + + "of regular expression patterns for SQL state, error code, and error message of\n" + + "retryable SQLExceptions, that's suitable for the metastore DB.\n" + + "For example: Can't serialize.*,40001$,^Deadlock,.*ORA-08176.*\n" + + "The string that the regex will be matched against is of the following form, where ex is a SQLException:\n" + + "ex.getMessage() + \" (SQLState=\" + ex.getSQLState() + \", ErrorCode=\" + ex.getErrorCode() + \")\""), + TXN_STORE_IMPL("metastore.txn.store.impl", "hive.metastore.txn.store.impl", + "org.apache.hadoop.hive.metastore.txn.CompactionTxnHandler", + "Name of class that implements org.apache.riven.txn.TxnStore. This " + + "class is used to store and retrieve transactions and locks"), + TXN_TIMEOUT("metastore.txn.timeout", "hive.txn.timeout", 300, TimeUnit.SECONDS, + "time after which transactions are declared aborted if the client has not sent a heartbeat."), + URI_RESOLVER("metastore.uri.resolver", "hive.metastore.uri.resolver", "", + "If set, fully qualified class name of resolver for hive metastore uri's"), + USERS_IN_ADMIN_ROLE("metastore.users.in.admin.role", "hive.users.in.admin.role", "", false, + "Comma separated list of users who are in admin role for bootstrapping.\n" + + "More users can be added in ADMIN role later."), + USE_SSL("metastore.use.SSL", "hive.metastore.use.SSL", false, + "Set this to true for using SSL encryption in HMS server."), + USE_THRIFT_SASL("metastore.sasl.enabled", "hive.metastore.sasl.enabled", false, + "If true, the metastore Thrift interface will be secured with SASL. Clients must authenticate with Kerberos."), + USE_THRIFT_FRAMED_TRANSPORT("metastore.thrift.framed.transport.enabled", + "hive.metastore.thrift.framed.transport.enabled", false, + "If true, the metastore Thrift interface will use TFramedTransport. When false (default) a standard TTransport is used."), + USE_THRIFT_COMPACT_PROTOCOL("metastore.thrift.compact.protocol.enabled", + "hive.metastore.thrift.compact.protocol.enabled", false, + "If true, the metastore Thrift interface will use TCompactProtocol. When false (default) TBinaryProtocol will be used.\n" + + "Setting it to true will break compatibility with older clients running TBinaryProtocol."), + VALIDATE_COLUMNS("datanucleus.schema.validateColumns", "datanucleus.schema.validateColumns", false, + "validates existing schema against code. turn this on if you want to verify existing schema"), + VALIDATE_CONSTRAINTS("datanucleus.schema.validateConstraints", + "datanucleus.schema.validateConstraints", false, + "validates existing schema against code. turn this on if you want to verify existing schema"), + VALIDATE_TABLES("datanucleus.schema.validateTables", + "datanucleus.schema.validateTables", false, + "validates existing schema against code. turn this on if you want to verify existing schema"), + WAREHOUSE("metastore.warehouse.dir", "hive.metastore.warehouse.dir", "/user/hive/warehouse", + "location of default database for the warehouse"), + WAREHOUSE_EXTERNAL("metastore.warehouse.external.dir", + "hive.metastore.warehouse.external.dir", "", + "Default location for external tables created in the warehouse. " + + "If not set or null, then the normal warehouse location will be used as the default location."), + WRITE_SET_REAPER_INTERVAL("metastore.writeset.reaper.interval", + "hive.writeset.reaper.interval", 60, TimeUnit.SECONDS, + "Frequency of WriteSet reaper runs"), + WM_DEFAULT_POOL_SIZE("metastore.wm.default.pool.size", + "hive.metastore.wm.default.pool.size", 4, + "The size of a default pool to create when creating an empty resource plan;\n" + + "If not positive, no default pool will be created."), + RAWSTORE_PARTITION_BATCH_SIZE("metastore.rawstore.batch.size", + "metastore.rawstore.batch.size", -1, + "Batch size for partition and other object retrieval from the underlying DB in JDO.\n" + + "The JDO implementation such as DataNucleus may run into issues when the generated queries are\n" + + "too large. Use this parameter to break the query into multiple batches. -1 means no batching."), + + // Hive values we have copied and use as is + // These two are used to indicate that we are running tests + HIVE_IN_TEST("hive.in.test", "hive.in.test", false, "internal usage only, true in test mode"), + HIVE_IN_TEZ_TEST("hive.in.tez.test", "hive.in.tez.test", false, + "internal use only, true when in testing tez"), + // We need to track this as some listeners pass it through our config and we need to honor + // the system properties. + HIVE_AUTHORIZATION_MANAGER("hive.security.authorization.manager", + "hive.security.authorization.manager", + "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory", + "The Hive client authorization manager class name. The user defined authorization class should implement \n" + + "interface org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider."), + HIVE_METASTORE_AUTHENTICATOR_MANAGER("hive.security.metastore.authenticator.manager", + "hive.security.metastore.authenticator.manager", + "org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator", + "authenticator manager class name to be used in the metastore for authentication. \n" + + "The user defined authenticator should implement interface org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider."), + HIVE_METASTORE_AUTHORIZATION_AUTH_READS("hive.security.metastore.authorization.auth.reads", + "hive.security.metastore.authorization.auth.reads", true, + "If this is true, metastore authorizer authorizes read actions on database, table"), + // The metastore shouldn't care what txn manager Hive is running, but in various tests it + // needs to set these values. We should do the work to detangle this. + HIVE_TXN_MANAGER("hive.txn.manager", "hive.txn.manager", + "org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager", + "Set to org.apache.hadoop.hive.ql.lockmgr.DbTxnManager as part of turning on Hive\n" + + "transactions, which also requires appropriate settings for hive.compactor.initiator.on,\n" + + "hive.compactor.worker.threads, hive.support.concurrency (true),\n" + + "and hive.exec.dynamic.partition.mode (nonstrict).\n" + + "The default DummyTxnManager replicates pre-Hive-0.13 behavior and provides\n" + + "no transactions."), + // Metastore always support concurrency, but certain ACID tests depend on this being set. We + // need to do the work to detangle this + HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", "hive.support.concurrency", false, + "Whether Hive supports concurrency control or not. \n" + + "A ZooKeeper instance must be up and running when using zookeeper Hive lock manager "), ++ HIVE_TXN_STATS_ENABLED("hive.txn.stats.enabled", "hive.txn.stats.enabled", true, ++ "Whether Hive supports transactional stats (accurate stats for transactional tables)"), + + // Deprecated Hive values that we are keeping for backwards compatibility. + @Deprecated + HIVE_CODAHALE_METRICS_REPORTER_CLASSES("hive.service.metrics.codahale.reporter.classes", + "hive.service.metrics.codahale.reporter.classes", "", + "Use METRICS_REPORTERS instead. Comma separated list of reporter implementation classes " + + "for metric class org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics. Overrides " + + "HIVE_METRICS_REPORTER conf if present. This will be overridden by " + + "METRICS_REPORTERS if it is present"), + @Deprecated + HIVE_METRICS_REPORTER("hive.service.metrics.reporter", "hive.service.metrics.reporter", "", + "Reporter implementations for metric class " + + "org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics;" + + "Deprecated, use METRICS_REPORTERS instead. This configuraiton will be" + + " overridden by HIVE_CODAHALE_METRICS_REPORTER_CLASSES and METRICS_REPORTERS if " + + "present. Comma separated list of JMX, CONSOLE, JSON_FILE, HADOOP2"), + + // These are all values that we put here just for testing + STR_TEST_ENTRY("test.str", "hive.test.str", "defaultval", "comment"), + STR_SET_ENTRY("test.str.set", "hive.test.str.set", "a", new StringSetValidator("a", "b", "c"), ""), + STR_LIST_ENTRY("test.str.list", "hive.test.str.list", "a,b,c", + "no comment"), + LONG_TEST_ENTRY("test.long", "hive.test.long", 42, "comment"), + DOUBLE_TEST_ENTRY("test.double", "hive.test.double", 3.141592654, "comment"), + TIME_TEST_ENTRY("test.time", "hive.test.time", 1, TimeUnit.SECONDS, "comment"), + TIME_VALIDATOR_ENTRY_INCLUSIVE("test.time.validator.inclusive", "hive.test.time.validator.inclusive", 1, + TimeUnit.SECONDS, + new TimeValidator(TimeUnit.MILLISECONDS, 500L, true, 1500L, true), "comment"), + TIME_VALIDATOR_ENTRY_EXCLUSIVE("test.time.validator.exclusive", "hive.test.time.validator.exclusive", 1, + TimeUnit.SECONDS, + new TimeValidator(TimeUnit.MILLISECONDS, 500L, false, 1500L, false), "comment"), + BOOLEAN_TEST_ENTRY("test.bool", "hive.test.bool", true, "comment"), + CLASS_TEST_ENTRY("test.class", "hive.test.class", "", "comment"); + + private final String varname; + private final String hiveName; + private final Object defaultVal; + private final Validator validator; + private final boolean caseSensitive; + private final String description; + + ConfVars(String varname, String hiveName, String defaultVal, String description) { + this.varname = varname; + this.hiveName = hiveName; + this.defaultVal = defaultVal; + validator = null; + caseSensitive = false; + this.description = description; + } + + ConfVars(String varname, String hiveName, String defaultVal, Validator validator, + String description) { + this.varname = varname; + this.hiveName = hiveName; + this.defaultVal = defaultVal; + this.validator = validator; + caseSensitive = false; + this.description = description; + } + + ConfVars(String varname, String hiveName, String defaultVal, boolean caseSensitive, + String description) { + this.varname = varname; + this.hiveName = hiveName; + this.defaultVal = defaultVal; + validator = null; + this.caseSensitive = caseSensitive; + this.description = description; + } + + ConfVars(String varname, String hiveName, long defaultVal, String description) { + this.varname = varname; + this.hiveName = hiveName; + this.defaultVal = defaultVal; + validator = null; + caseSensitive = false; + this.description = description; + } + + ConfVars(String varname, String hiveName, long defaultVal, Validator validator, + String description) { + this.varname = varname; + this.hiveName = hiveName; + this.defaultVal = defaultVal; + this.validator = validator; + caseSensitive = false; + this.description = description; + } + + ConfVars(String varname, String hiveName, boolean defaultVal, String description) { + this.varname = varname; + this.hiveName = hiveName; + this.defaultVal = defaultVal; + validator = null; + caseSensitive = false; + this.description = description; + } + + ConfVars(String varname, String hiveName, double defaultVal, String description) { + this.varname = varname; + this.hiveName = hiveName; + this.defaultVal = defaultVal; + validator = null; + caseSensitive = false; + this.description = description; + } + + ConfVars(String varname, String hiveName, long defaultVal, TimeUnit unit, String description) { + this.varname = varname; + this.hiveName = hiveName; + this.defaultVal = new TimeValue(defaultVal, unit); + validator = new TimeValidator(unit); + caseSensitive = false; + this.description = description; + } + + ConfVars(String varname, String hiveName, long defaultVal, TimeUnit unit, + Validator validator, String description) { + this.varname = varname; + this.hiveName = hiveName; + this.defaultVal = new TimeValue(defaultVal, unit); + this.validator = validator; + caseSensitive = false; + this.description = description; + } + + public void validate(String value) throws IllegalArgumentException { + if (validator != null) { + validator.validate(value); + } + } + + public boolean isCaseSensitive() { + return caseSensitive; + } + + /** + * If you are calling this, you're probably doing it wrong. You shouldn't need to use the + * underlying variable name. Use one of the getVar methods instead. Only use this if you + * are 100% sure you know you're doing. The reason for this is that MetastoreConf goes to a + * lot of trouble to make sure it checks both Hive and Metastore values for config keys. If + * you call {@link Configuration#get(String)} you are undermining that. + * @return variable name + */ + public String getVarname() { + return varname; + } + + /** + * Use this method if you need to set a system property and are going to instantiate the + * configuration file via HiveConf. This is because HiveConf only looks for values it knows, + * so it will miss all of the metastore.* ones. Do not use this to explicitly set or get the + * underlying config value unless you are 100% sure you know what you're doing. + * The reason for this is that MetastoreConf goes to a + * lot of trouble to make sure it checks both Hive and Metastore values for config keys. If + * you call {@link Configuration#get(String)} you are undermining that. + * @return hive.* configuration key + */ + public String getHiveName() { + return hiveName; + } + + public Object getDefaultVal() { + return defaultVal; + } + + public String getDescription() { + return description; + } + + /** + * This is useful if you need the variable name for a LOG message or + * {@link System#setProperty(String, String)}, beware however that you should only use this + * with setProperty if you're going to create a configuration via + * {@link MetastoreConf#newMetastoreConf()}. If you are going to create it with HiveConf, + * then use {@link #getHiveName()}. + * @return metastore.* configuration key + */ + @Override + public String toString() { + return varname; + } + } + + public static final ConfVars[] dataNucleusAndJdoConfs = { + ConfVars.AUTO_CREATE_ALL, + ConfVars.CONNECTION_DRIVER, + ConfVars.CONNECTION_POOLING_MAX_CONNECTIONS, + ConfVars.CONNECTION_POOLING_TYPE, + ConfVars.CONNECT_URL_KEY, + ConfVars.CONNECTION_USER_NAME, + ConfVars.DATANUCLEUS_AUTOSTART, + ConfVars.DATANUCLEUS_CACHE_LEVEL2, + ConfVars.DATANUCLEUS_CACHE_LEVEL2_TYPE, + ConfVars.DATANUCLEUS_INIT_COL_INFO, + ConfVars.DATANUCLEUS_PLUGIN_REGISTRY_BUNDLE_CHECK, + ConfVars.DATANUCLEUS_TRANSACTION_ISOLATION, + ConfVars.DATANUCLEUS_USE_LEGACY_VALUE_STRATEGY, + ConfVars.DETACH_ALL_ON_COMMIT, + ConfVars.IDENTIFIER_FACTORY, + ConfVars.MANAGER_FACTORY_CLASS, + ConfVars.MULTITHREADED, + ConfVars.NON_TRANSACTIONAL_READ, + ConfVars.PWD, + ConfVars.STORE_MANAGER_TYPE, + ConfVars.VALIDATE_COLUMNS, + ConfVars.VALIDATE_CONSTRAINTS, + ConfVars.VALIDATE_TABLES + }; + + // Make sure no one calls this + private MetastoreConf() { + throw new RuntimeException("You should never be creating one of these!"); + } + + public static void setHiveSiteLocation(URL location) { + hiveSiteURL = location; + } + + public static Configuration newMetastoreConf() { + return newMetastoreConf(new Configuration()); + } + + public static Configuration newMetastoreConf(Configuration conf) { + + ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); + if (classLoader == null) { + classLoader = MetastoreConf.class.getClassLoader(); + } + // We don't add this to the resources because we don't want to read config values from it. + // But we do find it because we want to remember where it is for later in case anyone calls + // getHiveDefaultLocation(). + hiveDefaultURL = classLoader.getResource("hive-default.xml"); + + // Add in hive-site.xml. We add this first so that it gets overridden by the new metastore + // specific files if they exist. + if(hiveSiteURL == null) { + /* + * this 'if' is pretty lame - QTestUtil.QTestUtil() uses hiveSiteURL to load a specific + * hive-site.xml from data/conf/<subdir> so this makes it follow the same logic - otherwise + * HiveConf and MetastoreConf may load different hive-site.xml ( For example, + * HiveConf uses data/conf/spark/hive-site.xml and MetastoreConf data/conf/hive-site.xml) + */ + hiveSiteURL = findConfigFile(classLoader, "hive-site.xml"); + } + if (hiveSiteURL != null) { + conf.addResource(hiveSiteURL); + } + + // Now add hivemetastore-site.xml. Again we add this before our own config files so that the + // newer overrides the older. + hiveMetastoreSiteURL = findConfigFile(classLoader, "hivemetastore-site.xml"); + if (hiveMetastoreSiteURL != null) { + conf.addResource(hiveMetastoreSiteURL); + } + + // Add in our conf file + metastoreSiteURL = findConfigFile(classLoader, "metastore-site.xml"); + if (metastoreSiteURL != null) { + conf.addResource(metastoreSiteURL); + } + + // If a system property that matches one of our conf value names is set then use the value + // it's set to to set our own conf value. + for (ConfVars var : ConfVars.values()) { + if (System.getProperty(var.varname) != null) { + LOG.debug("Setting conf value " + var.varname + " using value " + + System.getProperty(var.varname)); + conf.set(var.varname, System.getProperty(var.varname)); + } + } + + // Pick up any system properties that start with "hive." and set them in our config. This + // way we can properly pull any Hive values from the environment without needing to know all + // of the Hive config values. + System.getProperties().stringPropertyNames().stream() + .filter(s -> s.startsWith("hive.")) + .forEach(s -> { + String v = System.getProperty(s); + LOG.debug("Picking up system property " + s + " with value " + v); + conf.set(s, v); + }); + + // If we are going to validate the schema, make sure we don't create it + if (getBoolVar(conf, ConfVars.SCHEMA_VERIFICATION)) { + setBoolVar(conf, ConfVars.AUTO_CREATE_ALL, false); + } + + if (!beenDumped.getAndSet(true) && getBoolVar(conf, ConfVars.DUMP_CONFIG_ON_CREATION) && + LOG.isDebugEnabled()) { + LOG.debug(dumpConfig(conf)); + } + return conf; + } + + private static URL findConfigFile(ClassLoader classLoader, String name) { + // First, look in the classpath + URL result = classLoader.getResource(name); + if (result == null) { + // Nope, so look to see if our conf dir has been explicitly set + result = seeIfConfAtThisLocation("METASTORE_CONF_DIR", name, false); + if (result == null) { + // Nope, so look to see if our home dir has been explicitly set + result = seeIfConfAtThisLocation("METASTORE_HOME", name, true); + if (result == null) { + // Nope, so look to see if Hive's conf dir has been explicitly set + result = seeIfConfAtThisLocation("HIVE_CONF_DIR", name, false); + if (result == null) { + // Nope, so look to see if Hive's home dir has been explicitly set + result = seeIfConfAtThisLocation("HIVE_HOME", name, true); + if (result == null) { + // Nope, so look to see if we can find a conf file by finding our jar, going up one + // directory, and looking for a conf directory. + URI jarUri = null; + try { + jarUri = MetastoreConf.class.getProtectionDomain().getCodeSource().getLocation().toURI(); + } catch (Throwable e) { + LOG.warn("Cannot get jar URI", e); + } + result = seeIfConfAtThisLocation(new File(jarUri).getParent(), name, true); + // At this point if we haven't found it, screw it, we don't know where it is + if (result == null) { + LOG.info("Unable to find config file " + name); + } + } + } + } + } + } + LOG.info("Found configuration file " + result); + return result; + } + + private static URL seeIfConfAtThisLocation(String envVar, String name, boolean inConfDir) { + String path = System.getenv(envVar); + if (path == null) { + // Workaround for testing since tests can't set the env vars. + path = System.getProperty(TEST_ENV_WORKAROUND + envVar); + } + if (path != null) { + String suffix = inConfDir ? "conf" + File.separatorChar + name : name; + return checkConfigFile(new File(path, suffix)); + } + return null; + } + + private static URL checkConfigFile(File f) { + try { + return (f.exists() && f.isFile()) ? f.toURI().toURL() : null; + } catch (Throwable e) { + LOG.warn("Error looking for config " + f, e); + return null; + } + } + + // In all of the getters, we try the metastore value name first. If it is not set we try the + // Hive value name. + + /** + * Get the variable as a string + * @param conf configuration to retrieve it from + * @param var variable to retrieve + * @return value, or default value if value not in config file + */ + public static String getVar(Configuration conf, ConfVars var) { + assert var.defaultVal.getClass() == String.class; + String val = conf.get(var.varname); + return val == null ? conf.get(var.hiveName, (String)var.defaultVal) : val; + } + + /** + * Get the variable as a string + * @param conf configuration to retrieve it from + * @param var variable to retrieve + * @param defaultVal default to return if the variable is unset + * @return value, or default value passed in if the value is not in the config file + */ + public static String getVar(Configuration conf, ConfVars var, String defaultVal) { + assert var.defaultVal.getClass() == String.class; + String val = conf.get(var.varname); + return val == null ? conf.get(var.hiveName, defaultVal) : val; + } + + /** + * Treat a configuration value as a comma separated list. + * @param conf configuration to retrieve it from + * @param var variable to retrieve + * @return collection of strings. If the value is unset it will return an empty collection. + */ + public static Collection<String> getStringCollection(Configuration conf, ConfVars var) { + assert var.defaultVal.getClass() == String.class; + String val = conf.get(var.varname); + if (val == null) { + val = conf.get(var.hiveName, (String)var.defaultVal); + } + if (val == null) { + return Collections.emptySet(); + } + return StringUtils.asSet(val.split(",")); + } + + /** + * Set the variable as a string + * @param conf configuration file to set it in + * @param var variable to set + * @param val value to set it to + */ + public static void setVar(Configuration conf, ConfVars var, String val) { + assert var.defaultVal.getClass() == String.class; + conf.set(var.varname, val); + } + + /** + * Get the variable as a int. Note that all integer valued variables are stored as longs, thus + * this downcasts from a long to an in. + * @param conf configuration to retrieve it from + * @param var variable to retrieve + * @return value, or default value if value not in config file + */ + public static int getIntVar(Configuration conf, ConfVars var) { + long val = getLongVar(conf, var); + assert val <= Integer.MAX_VALUE; + return (int)val; + } + + /** + * Get the variable as a long + * @param conf configuration to retrieve it from + * @param var variable to retrieve + * @return value, or default value if value not in config file + */ + public static long getLongVar(Configuration conf, ConfVars var) { + assert var.defaultVal.getClass() == Long.class; + String val = conf.get(var.varname); + return val == null ? conf.getLong(var.hiveName, (Long)var.defaultVal) : Long.valueOf(val); + } + + /** + * Set the variable as a long + * @param conf configuration file to set it in + * @param var variable to set + * @param val value to set it to + */ + public static void setLongVar(Configuration conf, ConfVars var, long val) { + assert var.defaultVal.getClass() == Long.class; + conf.setLong(var.varname, val); + } + + /** + * Get the variable as a boolean + * @param conf configuration to retrieve it from + * @param var variable to retrieve + * @return value, or default value if value not in config file + */ + public static boolean getBoolVar(Configuration conf, ConfVars var) { + assert var.defaultVal.getClass() == Boolean.class; + String val = conf.get(var.varname); + return val == null ? conf.getBoolean(var.hiveName, (Boolean)var.defaultVal) : Boolean.valueOf(val); + } + + /** + * Set the variable as a boolean + * @param conf configuration file to set it in + * @param var variable to set + * @param val value to set it to + */ + public static void setBoolVar(Configuration conf, ConfVars var, boolean val) { + assert var.defaultVal.getClass() == Boolean.class; + conf.setBoolean(var.varname, val); + } + + /** + * Get the variable as a double + * @param conf configuration to retrieve it from + * @param var variable to retrieve + * @return value, or default value if value not in config file + */ + public static double getDoubleVar(Configuration conf, ConfVars var) { + assert var.defaultVal.getClass() == Double.class; + String val = conf.get(var.varname); + return val == null ? conf.getDouble(var.hiveName, (Double)var.defaultVal) : Double.valueOf(val); + } + + /** + * Set the variable as a double + * @param conf configuration file to set it in + * @param var variable to set + * @param val value to set it to + */ + public static void setDoubleVar(Configuration conf, ConfVars var, double val) { + assert var.defaultVal.getClass() == Double.class; + conf.setDouble(var.varname, val); + } + + public static long getSizeVar(Configuration conf, ConfVars var) { + return SizeValidator.toSizeBytes(getVar(conf, var)); + } + + /** + * Get a class instance based on a configuration value + * @param conf configuration file to retrieve it from + * @param var variable to retrieve + * @param defaultValue default class to return if the value isn't set + * @param xface interface that class must implement + * @param <I> interface that class implements + * @return instance of the class + */ + public static <I> Class<? extends I> getClass(Configuration conf, ConfVars var, + Class<? extends I> defaultValue, + Class<I> xface) { + assert var.defaultVal.getClass() == String.class; + String val = conf.get(var.varname); + return val == null ? conf.getClass(var.hiveName, defaultValue, xface) : + conf.getClass(var.varname, def
<TRUNCATED>
