http://git-wip-us.apache.org/repos/asf/hive/blob/37f05f41/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig deleted file mode 100644 index b214344..0000000 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig +++ /dev/null @@ -1,3372 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.conf; - -import com.google.common.base.Joiner; - -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; -import org.apache.hadoop.hive.conf.Validator.PatternSet; -import org.apache.hadoop.hive.conf.Validator.RangeValidator; -import org.apache.hadoop.hive.conf.Validator.RatioValidator; -import org.apache.hadoop.hive.conf.Validator.StringSet; -import org.apache.hadoop.hive.conf.Validator.TimeValidator; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.hive.shims.Utils; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.Shell; -import org.apache.hive.common.HiveCompat; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.security.auth.login.LoginException; - -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.PrintStream; -import java.net.URL; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Properties; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * Hive Configuration. - */ -public class HiveConf extends Configuration { - protected String hiveJar; - protected Properties origProp; - protected String auxJars; - private static final Logger l4j = LoggerFactory.getLogger(HiveConf.class); - private static boolean loadMetastoreConfig = false; - private static boolean loadHiveServer2Config = false; - private static URL hiveDefaultURL = null; - private static URL hiveSiteURL = null; - private static URL hivemetastoreSiteUrl = null; - private static URL hiveServer2SiteUrl = null; - - private static byte[] confVarByteArray = null; - - - private static final Map<String, ConfVars> vars = new HashMap<String, ConfVars>(); - private static final Map<String, ConfVars> metaConfs = new HashMap<String, ConfVars>(); - private final List<String> restrictList = new ArrayList<String>(); - private final Set<String> hiddenSet = new HashSet<String>(); - - private Pattern modWhiteListPattern = null; - private volatile boolean isSparkConfigUpdated = false; - private static final int LOG_PREFIX_LENGTH = 64; - - public boolean getSparkConfigUpdated() { - return isSparkConfigUpdated; - } - - public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { - this.isSparkConfigUpdated = isSparkConfigUpdated; - } - - static { - ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); - if (classLoader == null) { - classLoader = HiveConf.class.getClassLoader(); - } - - hiveDefaultURL = classLoader.getResource("hive-default.xml"); - - // Look for hive-site.xml on the CLASSPATH and log its location if found. - hiveSiteURL = classLoader.getResource("hive-site.xml"); - hivemetastoreSiteUrl = classLoader.getResource("hivemetastore-site.xml"); - hiveServer2SiteUrl = classLoader.getResource("hiveserver2-site.xml"); - - for (ConfVars confVar : ConfVars.values()) { - vars.put(confVar.varname, confVar); - } - } - - /** - * Metastore related options that the db is initialized against. When a conf - * var in this is list is changed, the metastore instance for the CLI will - * be recreated so that the change will take effect. - */ - public static final HiveConf.ConfVars[] metaVars = { - HiveConf.ConfVars.METASTOREWAREHOUSE, - HiveConf.ConfVars.METASTOREURIS, - HiveConf.ConfVars.METASTORE_SERVER_PORT, - HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, - HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, - HiveConf.ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY, - HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT, - HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_LIFETIME, - HiveConf.ConfVars.METASTOREPWD, - HiveConf.ConfVars.METASTORECONNECTURLHOOK, - HiveConf.ConfVars.METASTORECONNECTURLKEY, - HiveConf.ConfVars.METASTORESERVERMINTHREADS, - HiveConf.ConfVars.METASTORESERVERMAXTHREADS, - HiveConf.ConfVars.METASTORE_TCP_KEEP_ALIVE, - HiveConf.ConfVars.METASTORE_INT_ORIGINAL, - HiveConf.ConfVars.METASTORE_INT_ARCHIVED, - HiveConf.ConfVars.METASTORE_INT_EXTRACTED, - HiveConf.ConfVars.METASTORE_KERBEROS_KEYTAB_FILE, - HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL, - HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL, - HiveConf.ConfVars.METASTORE_CACHE_PINOBJTYPES, - HiveConf.ConfVars.METASTORE_CONNECTION_POOLING_TYPE, - HiveConf.ConfVars.METASTORE_VALIDATE_TABLES, - HiveConf.ConfVars.METASTORE_VALIDATE_COLUMNS, - HiveConf.ConfVars.METASTORE_VALIDATE_CONSTRAINTS, - HiveConf.ConfVars.METASTORE_STORE_MANAGER_TYPE, - HiveConf.ConfVars.METASTORE_AUTO_CREATE_SCHEMA, - HiveConf.ConfVars.METASTORE_AUTO_START_MECHANISM_MODE, - HiveConf.ConfVars.METASTORE_TRANSACTION_ISOLATION, - HiveConf.ConfVars.METASTORE_CACHE_LEVEL2, - HiveConf.ConfVars.METASTORE_CACHE_LEVEL2_TYPE, - HiveConf.ConfVars.METASTORE_IDENTIFIER_FACTORY, - HiveConf.ConfVars.METASTORE_PLUGIN_REGISTRY_BUNDLE_CHECK, - HiveConf.ConfVars.METASTORE_AUTHORIZATION_STORAGE_AUTH_CHECKS, - HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX, - HiveConf.ConfVars.METASTORE_EVENT_LISTENERS, - HiveConf.ConfVars.METASTORE_EVENT_CLEAN_FREQ, - HiveConf.ConfVars.METASTORE_EVENT_EXPIRY_DURATION, - HiveConf.ConfVars.METASTORE_FILTER_HOOK, - HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL, - HiveConf.ConfVars.METASTORE_END_FUNCTION_LISTENERS, - HiveConf.ConfVars.METASTORE_PART_INHERIT_TBL_PROPS, - HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_OBJECTS_MAX, - HiveConf.ConfVars.METASTORE_INIT_HOOKS, - HiveConf.ConfVars.METASTORE_PRE_EVENT_LISTENERS, - HiveConf.ConfVars.HMSHANDLERATTEMPTS, - HiveConf.ConfVars.HMSHANDLERINTERVAL, - HiveConf.ConfVars.HMSHANDLERFORCERELOADCONF, - HiveConf.ConfVars.METASTORE_PARTITION_NAME_WHITELIST_PATTERN, - HiveConf.ConfVars.METASTORE_ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS, - HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES, - HiveConf.ConfVars.USERS_IN_ADMIN_ROLE, - HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, - HiveConf.ConfVars.HIVE_TXN_MANAGER, - HiveConf.ConfVars.HIVE_TXN_TIMEOUT, - HiveConf.ConfVars.HIVE_TXN_MAX_OPEN_BATCH, - HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_ENABLED, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_SIZE, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_FPP, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_VARIANCE, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_TTL, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_READER_WAIT, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_FULL, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_CLEAN_UNTIL, - HiveConf.ConfVars.METASTORE_FASTPATH, - HiveConf.ConfVars.METASTORE_HBASE_CATALOG_CACHE_SIZE, - HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_SIZE, - HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS, - HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_FALSE_POSITIVE_PROBABILITY, - HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_VARIANCE, - HiveConf.ConfVars.METASTORE_HBASE_CACHE_TIME_TO_LIVE, - HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_WRITER_WAIT, - HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_READER_WAIT, - HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_FULL, - HiveConf.ConfVars.METASTORE_HBASE_CACHE_CLEAN_UNTIL, - HiveConf.ConfVars.METASTORE_HBASE_CONNECTION_CLASS, - HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_CACHE_ENTRIES, - HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_MEMORY_TTL, - HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_INVALIDATOR_FREQUENCY, - HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_HBASE_TTL - }; - - /** - * User configurable Metastore vars - */ - public static final HiveConf.ConfVars[] metaConfVars = { - HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL, - HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL_DDL, - HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT - }; - - static { - for (ConfVars confVar : metaConfVars) { - metaConfs.put(confVar.varname, confVar); - } - } - - /** - * dbVars are the parameters can be set per database. If these - * parameters are set as a database property, when switching to that - * database, the HiveConf variable will be changed. The change of these - * parameters will effectively change the DFS and MapReduce clusters - * for different databases. - */ - public static final HiveConf.ConfVars[] dbVars = { - HiveConf.ConfVars.HADOOPBIN, - HiveConf.ConfVars.METASTOREWAREHOUSE, - HiveConf.ConfVars.SCRATCHDIR - }; - - /** - * ConfVars. - * - * These are the default configuration properties for Hive. Each HiveConf - * object is initialized as follows: - * - * 1) Hadoop configuration properties are applied. - * 2) ConfVar properties with non-null values are overlayed. - * 3) hive-site.xml properties are overlayed. - * - * WARNING: think twice before adding any Hadoop configuration properties - * with non-null values to this list as they will override any values defined - * in the underlying Hadoop configuration. - */ - public static enum ConfVars { - // QL execution stuff - SCRIPTWRAPPER("hive.exec.script.wrapper", null, ""), - PLAN("hive.exec.plan", "", ""), - PLAN_SERIALIZATION("hive.plan.serialization.format", "kryo", - "Query plan format serialization between client and task nodes. \n" + - "Two supported values are : kryo and javaXML. Kryo is default."), - STAGINGDIR("hive.exec.stagingdir", ".hive-staging", - "Directory name that will be created inside table locations in order to support HDFS encryption. " + - "This is replaces ${hive.exec.scratchdir} for query results with the exception of read-only tables. " + - "In all cases ${hive.exec.scratchdir} is still used for other temporary files, such as job plans."), - SCRATCHDIR("hive.exec.scratchdir", "/tmp/hive", - "HDFS root scratch dir for Hive jobs which gets created with write all (733) permission. " + - "For each connecting user, an HDFS scratch dir: ${hive.exec.scratchdir}/<username> is created, " + - "with ${hive.scratch.dir.permission}."), - LOCALSCRATCHDIR("hive.exec.local.scratchdir", - "${system:java.io.tmpdir}" + File.separator + "${system:user.name}", - "Local scratch space for Hive jobs"), - DOWNLOADED_RESOURCES_DIR("hive.downloaded.resources.dir", - "${system:java.io.tmpdir}" + File.separator + "${hive.session.id}_resources", - "Temporary local directory for added resources in the remote file system."), - SCRATCHDIRPERMISSION("hive.scratch.dir.permission", "700", - "The permission for the user specific scratch directories that get created."), - SUBMITVIACHILD("hive.exec.submitviachild", false, ""), - SUBMITLOCALTASKVIACHILD("hive.exec.submit.local.task.via.child", true, - "Determines whether local tasks (typically mapjoin hashtable generation phase) runs in \n" + - "separate JVM (true recommended) or not. \n" + - "Avoids the overhead of spawning new JVM, but can lead to out-of-memory issues."), - SCRIPTERRORLIMIT("hive.exec.script.maxerrsize", 100000, - "Maximum number of bytes a script is allowed to emit to standard error (per map-reduce task). \n" + - "This prevents runaway scripts from filling logs partitions to capacity"), - ALLOWPARTIALCONSUMP("hive.exec.script.allow.partial.consumption", false, - "When enabled, this option allows a user script to exit successfully without consuming \n" + - "all the data from the standard input."), - STREAMREPORTERPERFIX("stream.stderr.reporter.prefix", "reporter:", - "Streaming jobs that log to standard error with this prefix can log counter or status information."), - STREAMREPORTERENABLED("stream.stderr.reporter.enabled", true, - "Enable consumption of status and counter messages for streaming jobs."), - COMPRESSRESULT("hive.exec.compress.output", false, - "This controls whether the final outputs of a query (to a local/HDFS file or a Hive table) is compressed. \n" + - "The compression codec and other options are determined from Hadoop config variables mapred.output.compress*"), - COMPRESSINTERMEDIATE("hive.exec.compress.intermediate", false, - "This controls whether intermediate files produced by Hive between multiple map-reduce jobs are compressed. \n" + - "The compression codec and other options are determined from Hadoop config variables mapred.output.compress*"), - COMPRESSINTERMEDIATECODEC("hive.intermediate.compression.codec", "", ""), - COMPRESSINTERMEDIATETYPE("hive.intermediate.compression.type", "", ""), - BYTESPERREDUCER("hive.exec.reducers.bytes.per.reducer", (long) (256 * 1000 * 1000), - "size per reducer.The default is 256Mb, i.e if the input size is 1G, it will use 4 reducers."), - MAXREDUCERS("hive.exec.reducers.max", 1009, - "max number of reducers will be used. If the one specified in the configuration parameter mapred.reduce.tasks is\n" + - "negative, Hive will use this one as the max number of reducers when automatically determine number of reducers."), - PREEXECHOOKS("hive.exec.pre.hooks", "", - "Comma-separated list of pre-execution hooks to be invoked for each statement. \n" + - "A pre-execution hook is specified as the name of a Java class which implements the \n" + - "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."), - POSTEXECHOOKS("hive.exec.post.hooks", "", - "Comma-separated list of post-execution hooks to be invoked for each statement. \n" + - "A post-execution hook is specified as the name of a Java class which implements the \n" + - "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."), - ONFAILUREHOOKS("hive.exec.failure.hooks", "", - "Comma-separated list of on-failure hooks to be invoked for each statement. \n" + - "An on-failure hook is specified as the name of Java class which implements the \n" + - "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."), - QUERYREDACTORHOOKS("hive.exec.query.redactor.hooks", "", - "Comma-separated list of hooks to be invoked for each query which can \n" + - "tranform the query before it's placed in the job.xml file. Must be a Java class which \n" + - "extends from the org.apache.hadoop.hive.ql.hooks.Redactor abstract class."), - CLIENTSTATSPUBLISHERS("hive.client.stats.publishers", "", - "Comma-separated list of statistics publishers to be invoked on counters on each job. \n" + - "A client stats publisher is specified as the name of a Java class which implements the \n" + - "org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface."), - EXECPARALLEL("hive.exec.parallel", false, "Whether to execute jobs in parallel"), - EXECPARALLETHREADNUMBER("hive.exec.parallel.thread.number", 8, - "How many jobs at most can be executed in parallel"), - HIVESPECULATIVEEXECREDUCERS("hive.mapred.reduce.tasks.speculative.execution", true, - "Whether speculative execution for reducers should be turned on. "), - HIVECOUNTERSPULLINTERVAL("hive.exec.counters.pull.interval", 1000L, - "The interval with which to poll the JobTracker for the counters the running job. \n" + - "The smaller it is the more load there will be on the jobtracker, the higher it is the less granular the caught will be."), - DYNAMICPARTITIONING("hive.exec.dynamic.partition", true, - "Whether or not to allow dynamic partitions in DML/DDL."), - DYNAMICPARTITIONINGMODE("hive.exec.dynamic.partition.mode", "strict", - "In strict mode, the user must specify at least one static partition\n" + - "in case the user accidentally overwrites all partitions.\n" + - "In nonstrict mode all partitions are allowed to be dynamic."), - DYNAMICPARTITIONMAXPARTS("hive.exec.max.dynamic.partitions", 1000, - "Maximum number of dynamic partitions allowed to be created in total."), - DYNAMICPARTITIONMAXPARTSPERNODE("hive.exec.max.dynamic.partitions.pernode", 100, - "Maximum number of dynamic partitions allowed to be created in each mapper/reducer node."), - MAXCREATEDFILES("hive.exec.max.created.files", 100000L, - "Maximum number of HDFS files created by all mappers/reducers in a MapReduce job."), - DEFAULTPARTITIONNAME("hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__", - "The default partition name in case the dynamic partition column value is null/empty string or any other values that cannot be escaped. \n" + - "This value must not contain any special character used in HDFS URI (e.g., ':', '%', '/' etc). \n" + - "The user has to be aware that the dynamic partition value should not contain this value to avoid confusions."), - DEFAULT_ZOOKEEPER_PARTITION_NAME("hive.lockmgr.zookeeper.default.partition.name", "__HIVE_DEFAULT_ZOOKEEPER_PARTITION__", ""), - - // Whether to show a link to the most failed task + debugging tips - SHOW_JOB_FAIL_DEBUG_INFO("hive.exec.show.job.failure.debug.info", true, - "If a job fails, whether to provide a link in the CLI to the task with the\n" + - "most failures, along with debugging hints if applicable."), - JOB_DEBUG_CAPTURE_STACKTRACES("hive.exec.job.debug.capture.stacktraces", true, - "Whether or not stack traces parsed from the task logs of a sampled failed task \n" + - "for each failed job should be stored in the SessionState"), - JOB_DEBUG_TIMEOUT("hive.exec.job.debug.timeout", 30000, ""), - TASKLOG_DEBUG_TIMEOUT("hive.exec.tasklog.debug.timeout", 20000, ""), - OUTPUT_FILE_EXTENSION("hive.output.file.extension", null, - "String used as a file extension for output files. \n" + - "If not set, defaults to the codec extension for text files (e.g. \".gz\"), or no extension otherwise."), - - HIVE_IN_TEST("hive.in.test", false, "internal usage only, true in test mode", true), - - HIVE_IN_TEZ_TEST("hive.in.tez.test", false, "internal use only, true when in testing tez", - true), - - LOCALMODEAUTO("hive.exec.mode.local.auto", false, - "Let Hive determine whether to run in local mode automatically"), - LOCALMODEMAXBYTES("hive.exec.mode.local.auto.inputbytes.max", 134217728L, - "When hive.exec.mode.local.auto is true, input bytes should less than this for local mode."), - LOCALMODEMAXINPUTFILES("hive.exec.mode.local.auto.input.files.max", 4, - "When hive.exec.mode.local.auto is true, the number of tasks should less than this for local mode."), - - DROPIGNORESNONEXISTENT("hive.exec.drop.ignorenonexistent", true, - "Do not report an error if DROP TABLE/VIEW/Index/Function specifies a non-existent table/view/index/function"), - - HIVEIGNOREMAPJOINHINT("hive.ignore.mapjoin.hint", true, "Ignore the mapjoin hint"), - - HIVE_FILE_MAX_FOOTER("hive.file.max.footer", 100, - "maximum number of lines for footer user can define for a table file"), - - HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES("hive.resultset.use.unique.column.names", true, - "Make column names unique in the result set by qualifying column names with table alias if needed.\n" + - "Table alias will be added to column names for queries of type \"select *\" or \n" + - "if query explicitly uses table alias \"select r1.x..\"."), - - // Hadoop Configuration Properties - // Properties with null values are ignored and exist only for the purpose of giving us - // a symbolic name to reference in the Hive source code. Properties with non-null - // values will override any values set in the underlying Hadoop configuration. - HADOOPBIN("hadoop.bin.path", findHadoopBinary(), "", true), - HIVE_FS_HAR_IMPL("fs.har.impl", "org.apache.hadoop.hive.shims.HiveHarFileSystem", - "The implementation for accessing Hadoop Archives. Note that this won't be applicable to Hadoop versions less than 0.20"), - HADOOPFS(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPFS"), null, "", true), - HADOOPMAPFILENAME(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPMAPFILENAME"), null, "", true), - HADOOPMAPREDINPUTDIR(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPMAPREDINPUTDIR"), null, "", true), - HADOOPMAPREDINPUTDIRRECURSIVE(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPMAPREDINPUTDIRRECURSIVE"), false, "", true), - MAPREDMAXSPLITSIZE(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), 256000000L, "", true), - MAPREDMINSPLITSIZE(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), 1L, "", true), - MAPREDMINSPLITSIZEPERNODE(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZEPERNODE"), 1L, "", true), - MAPREDMINSPLITSIZEPERRACK(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZEPERRACK"), 1L, "", true), - // The number of reduce tasks per job. Hadoop sets this value to 1 by default - // By setting this property to -1, Hive will automatically determine the correct - // number of reducers. - HADOOPNUMREDUCERS(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPNUMREDUCERS"), -1, "", true), - HADOOPJOBNAME(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPJOBNAME"), null, "", true), - HADOOPSPECULATIVEEXECREDUCERS(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPSPECULATIVEEXECREDUCERS"), true, "", true), - MAPREDSETUPCLEANUPNEEDED(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDSETUPCLEANUPNEEDED"), false, "", true), - MAPREDTASKCLEANUPNEEDED(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDTASKCLEANUPNEEDED"), false, "", true), - - // Metastore stuff. Be sure to update HiveConf.metaVars when you add something here! - METASTOREWAREHOUSE("hive.metastore.warehouse.dir", "/user/hive/warehouse", - "location of default database for the warehouse"), - METASTOREURIS("hive.metastore.uris", "", - "Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore."), - - METASTORE_FASTPATH("hive.metastore.fastpath", false, - "Used to avoid all of the proxies and object copies in the metastore. Note, if this is " + - "set, you MUST use a local metastore (hive.metastore.uris must be empty) otherwise " + - "undefined and most likely undesired behavior will result"), - METASTORE_HBASE_CATALOG_CACHE_SIZE("hive.metastore.hbase.catalog.cache.size", 50000, "Maximum number of " + - "objects we will place in the hbase metastore catalog cache. The objects will be divided up by " + - "types that we need to cache."), - METASTORE_HBASE_AGGREGATE_STATS_CACHE_SIZE("hive.metastore.hbase.aggregate.stats.cache.size", 10000, - "Maximum number of aggregate stats nodes that we will place in the hbase metastore aggregate stats cache."), - METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS("hive.metastore.hbase.aggregate.stats.max.partitions", 10000, - "Maximum number of partitions that are aggregated per cache node."), - METASTORE_HBASE_AGGREGATE_STATS_CACHE_FALSE_POSITIVE_PROBABILITY("hive.metastore.hbase.aggregate.stats.false.positive.probability", - (float) 0.01, "Maximum false positive probability for the Bloom Filter used in each aggregate stats cache node (default 1%)."), - METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_VARIANCE("hive.metastore.hbase.aggregate.stats.max.variance", (float) 0.1, - "Maximum tolerable variance in number of partitions between a cached node and our request (default 10%)."), - METASTORE_HBASE_CACHE_TIME_TO_LIVE("hive.metastore.hbase.cache.ttl", "600s", new TimeValidator(TimeUnit.SECONDS), - "Number of seconds for a cached node to be active in the cache before they become stale."), - METASTORE_HBASE_CACHE_MAX_WRITER_WAIT("hive.metastore.hbase.cache.max.writer.wait", "5000ms", new TimeValidator(TimeUnit.MILLISECONDS), - "Number of milliseconds a writer will wait to acquire the writelock before giving up."), - METASTORE_HBASE_CACHE_MAX_READER_WAIT("hive.metastore.hbase.cache.max.reader.wait", "1000ms", new TimeValidator(TimeUnit.MILLISECONDS), - "Number of milliseconds a reader will wait to acquire the readlock before giving up."), - METASTORE_HBASE_CACHE_MAX_FULL("hive.metastore.hbase.cache.max.full", (float) 0.9, - "Maximum cache full % after which the cache cleaner thread kicks in."), - METASTORE_HBASE_CACHE_CLEAN_UNTIL("hive.metastore.hbase.cache.clean.until", (float) 0.8, - "The cleaner thread cleans until cache reaches this % full size."), - METASTORE_HBASE_CONNECTION_CLASS("hive.metastore.hbase.connection.class", - "org.apache.hadoop.hive.metastore.hbase.VanillaHBaseConnection", - "Class used to connection to HBase"), - METASTORE_HBASE_AGGR_STATS_CACHE_ENTRIES("hive.metastore.hbase.aggr.stats.cache.entries", - 10000, "How many in stats objects to cache in memory"), - METASTORE_HBASE_AGGR_STATS_MEMORY_TTL("hive.metastore.hbase.aggr.stats.memory.ttl", "60s", - new TimeValidator(TimeUnit.SECONDS), - "Number of seconds stats objects live in memory after they are read from HBase."), - METASTORE_HBASE_AGGR_STATS_INVALIDATOR_FREQUENCY( - "hive.metastore.hbase.aggr.stats.invalidator.frequency", "5s", - new TimeValidator(TimeUnit.SECONDS), - "How often the stats cache scans its HBase entries and looks for expired entries"), - METASTORE_HBASE_AGGR_STATS_HBASE_TTL("hive.metastore.hbase.aggr.stats.hbase.ttl", "604800s", - new TimeValidator(TimeUnit.SECONDS), - "Number of seconds stats entries live in HBase cache after they are created. They may be" + - " invalided by updates or partition drops before this. Default is one week."), - - METASTORETHRIFTCONNECTIONRETRIES("hive.metastore.connect.retries", 3, - "Number of retries while opening a connection to metastore"), - METASTORETHRIFTFAILURERETRIES("hive.metastore.failure.retries", 1, - "Number of retries upon failure of Thrift metastore calls"), - METASTORE_SERVER_PORT("hive.metastore.port", 9083, "Hive metastore listener port"), - METASTORE_CLIENT_CONNECT_RETRY_DELAY("hive.metastore.client.connect.retry.delay", "1s", - new TimeValidator(TimeUnit.SECONDS), - "Number of seconds for the client to wait between consecutive connection attempts"), - METASTORE_CLIENT_SOCKET_TIMEOUT("hive.metastore.client.socket.timeout", "600s", - new TimeValidator(TimeUnit.SECONDS), - "MetaStore Client socket timeout in seconds"), - METASTORE_CLIENT_SOCKET_LIFETIME("hive.metastore.client.socket.lifetime", "0s", - new TimeValidator(TimeUnit.SECONDS), - "MetaStore Client socket lifetime in seconds. After this time is exceeded, client\n" + - "reconnects on the next MetaStore operation. A value of 0s means the connection\n" + - "has an infinite lifetime."), - METASTOREPWD("javax.jdo.option.ConnectionPassword", "mine", - "password to use against metastore database"), - METASTORECONNECTURLHOOK("hive.metastore.ds.connection.url.hook", "", - "Name of the hook to use for retrieving the JDO connection URL. If empty, the value in javax.jdo.option.ConnectionURL is used"), - METASTOREMULTITHREADED("javax.jdo.option.Multithreaded", true, - "Set this to true if multiple threads access metastore through JDO concurrently."), - METASTORECONNECTURLKEY("javax.jdo.option.ConnectionURL", - "jdbc:derby:;databaseName=metastore_db;create=true", - "JDBC connect string for a JDBC metastore"), - HMSHANDLERATTEMPTS("hive.hmshandler.retry.attempts", 10, - "The number of times to retry a HMSHandler call if there were a connection error."), - HMSHANDLERINTERVAL("hive.hmshandler.retry.interval", "2000ms", - new TimeValidator(TimeUnit.MILLISECONDS), "The time between HMSHandler retry attempts on failure."), - HMSHANDLERFORCERELOADCONF("hive.hmshandler.force.reload.conf", false, - "Whether to force reloading of the HMSHandler configuration (including\n" + - "the connection URL, before the next metastore query that accesses the\n" + - "datastore. Once reloaded, this value is reset to false. Used for\n" + - "testing only."), - METASTORESERVERMAXMESSAGESIZE("hive.metastore.server.max.message.size", 100*1024*1024, - "Maximum message size in bytes a HMS will accept."), - METASTORESERVERMINTHREADS("hive.metastore.server.min.threads", 200, - "Minimum number of worker threads in the Thrift server's pool."), - METASTORESERVERMAXTHREADS("hive.metastore.server.max.threads", 1000, - "Maximum number of worker threads in the Thrift server's pool."), - METASTORE_TCP_KEEP_ALIVE("hive.metastore.server.tcp.keepalive", true, - "Whether to enable TCP keepalive for the metastore server. Keepalive will prevent accumulation of half-open connections."), - - METASTORE_INT_ORIGINAL("hive.metastore.archive.intermediate.original", - "_INTERMEDIATE_ORIGINAL", - "Intermediate dir suffixes used for archiving. Not important what they\n" + - "are, as long as collisions are avoided"), - METASTORE_INT_ARCHIVED("hive.metastore.archive.intermediate.archived", - "_INTERMEDIATE_ARCHIVED", ""), - METASTORE_INT_EXTRACTED("hive.metastore.archive.intermediate.extracted", - "_INTERMEDIATE_EXTRACTED", ""), - METASTORE_KERBEROS_KEYTAB_FILE("hive.metastore.kerberos.keytab.file", "", - "The path to the Kerberos Keytab file containing the metastore Thrift server's service principal."), - METASTORE_KERBEROS_PRINCIPAL("hive.metastore.kerberos.principal", - "hive-metastore/_h...@example.com", - "The service principal for the metastore Thrift server. \n" + - "The special string _HOST will be replaced automatically with the correct host name."), - METASTORE_USE_THRIFT_SASL("hive.metastore.sasl.enabled", false, - "If true, the metastore Thrift interface will be secured with SASL. Clients must authenticate with Kerberos."), - METASTORE_USE_THRIFT_FRAMED_TRANSPORT("hive.metastore.thrift.framed.transport.enabled", false, - "If true, the metastore Thrift interface will use TFramedTransport. When false (default) a standard TTransport is used."), - METASTORE_USE_THRIFT_COMPACT_PROTOCOL("hive.metastore.thrift.compact.protocol.enabled", false, - "If true, the metastore Thrift interface will use TCompactProtocol. When false (default) TBinaryProtocol will be used.\n" + - "Setting it to true will break compatibility with older clients running TBinaryProtocol."), - METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_CLS("hive.cluster.delegation.token.store.class", - "org.apache.hadoop.hive.thrift.MemoryTokenStore", - "The delegation token store implementation. Set to org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced cluster."), - METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_CONNECTSTR( - "hive.cluster.delegation.token.store.zookeeper.connectString", "", - "The ZooKeeper token store connect string. You can re-use the configuration value\n" + - "set in hive.zookeeper.quorum, by leaving this parameter unset."), - METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_ZNODE( - "hive.cluster.delegation.token.store.zookeeper.znode", "/hivedelegation", - "The root path for token store data. Note that this is used by both HiveServer2 and\n" + - "MetaStore to store delegation Token. One directory gets created for each of them.\n" + - "The final directory names would have the servername appended to it (HIVESERVER2,\n" + - "METASTORE)."), - METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_ACL( - "hive.cluster.delegation.token.store.zookeeper.acl", "", - "ACL for token store entries. Comma separated list of ACL entries. For example:\n" + - "sasl:hive/host1@MY.DOMAIN:cdrwa,sasl:hive/host2@MY.DOMAIN:cdrwa\n" + - "Defaults to all permissions for the hiveserver2/metastore process user."), - METASTORE_CACHE_PINOBJTYPES("hive.metastore.cache.pinobjtypes", "Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order", - "List of comma separated metastore object types that should be pinned in the cache"), - METASTORE_CONNECTION_POOLING_TYPE("datanucleus.connectionPoolingType", "BONECP", - "Specify connection pool library for datanucleus"), - METASTORE_VALIDATE_TABLES("datanucleus.validateTables", false, - "validates existing schema against code. turn this on if you want to verify existing schema"), - METASTORE_VALIDATE_COLUMNS("datanucleus.validateColumns", false, - "validates existing schema against code. turn this on if you want to verify existing schema"), - METASTORE_VALIDATE_CONSTRAINTS("datanucleus.validateConstraints", false, - "validates existing schema against code. turn this on if you want to verify existing schema"), - METASTORE_STORE_MANAGER_TYPE("datanucleus.storeManagerType", "rdbms", "metadata store type"), - METASTORE_AUTO_CREATE_SCHEMA("datanucleus.autoCreateSchema", true, - "creates necessary schema on a startup if one doesn't exist. set this to false, after creating it once"), - METASTORE_FIXED_DATASTORE("datanucleus.fixedDatastore", false, ""), - METASTORE_SCHEMA_VERIFICATION("hive.metastore.schema.verification", false, - "Enforce metastore schema version consistency.\n" + - "True: Verify that version information stored in metastore matches with one from Hive jars. Also disable automatic\n" + - " schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures\n" + - " proper metastore schema migration. (Default)\n" + - "False: Warn if the version information stored in metastore doesn't match with one from in Hive jars."), - METASTORE_SCHEMA_VERIFICATION_RECORD_VERSION("hive.metastore.schema.verification.record.version", true, - "When true the current MS version is recorded in the VERSION table. If this is disabled and verification is\n" + - " enabled the MS will be unusable."), - METASTORE_AUTO_START_MECHANISM_MODE("datanucleus.autoStartMechanismMode", "checked", - "throw exception if metadata tables are incorrect"), - METASTORE_TRANSACTION_ISOLATION("datanucleus.transactionIsolation", "read-committed", - "Default transaction isolation level for identity generation."), - METASTORE_CACHE_LEVEL2("datanucleus.cache.level2", false, - "Use a level 2 cache. Turn this off if metadata is changed independently of Hive metastore server"), - METASTORE_CACHE_LEVEL2_TYPE("datanucleus.cache.level2.type", "none", ""), - METASTORE_IDENTIFIER_FACTORY("datanucleus.identifierFactory", "datanucleus1", - "Name of the identifier factory to use when generating table/column names etc. \n" + - "'datanucleus1' is used for backward compatibility with DataNucleus v1"), - METASTORE_USE_LEGACY_VALUE_STRATEGY("datanucleus.rdbms.useLegacyNativeValueStrategy", true, ""), - METASTORE_PLUGIN_REGISTRY_BUNDLE_CHECK("datanucleus.plugin.pluginRegistryBundleCheck", "LOG", - "Defines what happens when plugin bundles are found and are duplicated [EXCEPTION|LOG|NONE]"), - METASTORE_BATCH_RETRIEVE_MAX("hive.metastore.batch.retrieve.max", 300, - "Maximum number of objects (tables/partitions) can be retrieved from metastore in one batch. \n" + - "The higher the number, the less the number of round trips is needed to the Hive metastore server, \n" + - "but it may also cause higher memory requirement at the client side."), - METASTORE_BATCH_RETRIEVE_OBJECTS_MAX( - "hive.metastore.batch.retrieve.table.partition.max", 1000, - "Maximum number of objects that metastore internally retrieves in one batch."), - - METASTORE_INIT_HOOKS("hive.metastore.init.hooks", "", - "A comma separated list of hooks to be invoked at the beginning of HMSHandler initialization. \n" + - "An init hook is specified as the name of Java class which extends org.apache.hadoop.hive.metastore.MetaStoreInitListener."), - METASTORE_PRE_EVENT_LISTENERS("hive.metastore.pre.event.listeners", "", - "List of comma separated listeners for metastore events."), - METASTORE_EVENT_LISTENERS("hive.metastore.event.listeners", "", ""), - METASTORE_EVENT_DB_LISTENER_TTL("hive.metastore.event.db.listener.timetolive", "86400s", - new TimeValidator(TimeUnit.SECONDS), - "time after which events will be removed from the database listener queue"), - METASTORE_AUTHORIZATION_STORAGE_AUTH_CHECKS("hive.metastore.authorization.storage.checks", false, - "Should the metastore do authorization checks against the underlying storage (usually hdfs) \n" + - "for operations like drop-partition (disallow the drop-partition if the user in\n" + - "question doesn't have permissions to delete the corresponding directory\n" + - "on the storage)."), - METASTORE_EVENT_CLEAN_FREQ("hive.metastore.event.clean.freq", "0s", - new TimeValidator(TimeUnit.SECONDS), - "Frequency at which timer task runs to purge expired events in metastore."), - METASTORE_EVENT_EXPIRY_DURATION("hive.metastore.event.expiry.duration", "0s", - new TimeValidator(TimeUnit.SECONDS), - "Duration after which events expire from events table"), - METASTORE_EXECUTE_SET_UGI("hive.metastore.execute.setugi", true, - "In unsecure mode, setting this property to true will cause the metastore to execute DFS operations using \n" + - "the client's reported user and group permissions. Note that this property must be set on \n" + - "both the client and server sides. Further note that its best effort. \n" + - "If client sets its to true and server sets it to false, client setting will be ignored."), - METASTORE_PARTITION_NAME_WHITELIST_PATTERN("hive.metastore.partition.name.whitelist.pattern", "", - "Partition names will be checked against this regex pattern and rejected if not matched."), - - METASTORE_INTEGER_JDO_PUSHDOWN("hive.metastore.integral.jdo.pushdown", false, - "Allow JDO query pushdown for integral partition columns in metastore. Off by default. This\n" + - "improves metastore perf for integral columns, especially if there's a large number of partitions.\n" + - "However, it doesn't work correctly with integral values that are not normalized (e.g. have\n" + - "leading zeroes, like 0012). If metastore direct SQL is enabled and works, this optimization\n" + - "is also irrelevant."), - METASTORE_TRY_DIRECT_SQL("hive.metastore.try.direct.sql", true, - "Whether the Hive metastore should try to use direct SQL queries instead of the\n" + - "DataNucleus for certain read paths. This can improve metastore performance when\n" + - "fetching many partitions or column statistics by orders of magnitude; however, it\n" + - "is not guaranteed to work on all RDBMS-es and all versions. In case of SQL failures,\n" + - "the metastore will fall back to the DataNucleus, so it's safe even if SQL doesn't\n" + - "work for all queries on your datastore. If all SQL queries fail (for example, your\n" + - "metastore is backed by MongoDB), you might want to disable this to save the\n" + - "try-and-fall-back cost."), - METASTORE_DIRECT_SQL_PARTITION_BATCH_SIZE("hive.metastore.direct.sql.batch.size", 0, - "Batch size for partition and other object retrieval from the underlying DB in direct\n" + - "SQL. For some DBs like Oracle and MSSQL, there are hardcoded or perf-based limitations\n" + - "that necessitate this. For DBs that can handle the queries, this isn't necessary and\n" + - "may impede performance. -1 means no batching, 0 means automatic batching."), - METASTORE_TRY_DIRECT_SQL_DDL("hive.metastore.try.direct.sql.ddl", true, - "Same as hive.metastore.try.direct.sql, for read statements within a transaction that\n" + - "modifies metastore data. Due to non-standard behavior in Postgres, if a direct SQL\n" + - "select query has incorrect syntax or something similar inside a transaction, the\n" + - "entire transaction will fail and fall-back to DataNucleus will not be possible. You\n" + - "should disable the usage of direct SQL inside transactions if that happens in your case."), - METASTORE_ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS("hive.metastore.orm.retrieveMapNullsAsEmptyStrings",false, - "Thrift does not support nulls in maps, so any nulls present in maps retrieved from ORM must " + - "either be pruned or converted to empty strings. Some backing dbs such as Oracle persist empty strings " + - "as nulls, so we should set this parameter if we wish to reverse that behaviour. For others, " + - "pruning is the correct behaviour"), - METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES( - "hive.metastore.disallow.incompatible.col.type.changes", false, - "If true (default is false), ALTER TABLE operations which change the type of a\n" + - "column (say STRING) to an incompatible type (say MAP) are disallowed.\n" + - "RCFile default SerDe (ColumnarSerDe) serializes the values in such a way that the\n" + - "datatypes can be converted from string to any type. The map is also serialized as\n" + - "a string, which can be read as a string as well. However, with any binary\n" + - "serialization, this is not true. Blocking the ALTER TABLE prevents ClassCastExceptions\n" + - "when subsequently trying to access old partitions.\n" + - "\n" + - "Primitive types like INT, STRING, BIGINT, etc., are compatible with each other and are\n" + - "not blocked.\n" + - "\n" + - "See HIVE-4409 for more details."), - - NEWTABLEDEFAULTPARA("hive.table.parameters.default", "", - "Default property values for newly created tables"), - DDL_CTL_PARAMETERS_WHITELIST("hive.ddl.createtablelike.properties.whitelist", "", - "Table Properties to copy over when executing a Create Table Like."), - METASTORE_RAW_STORE_IMPL("hive.metastore.rawstore.impl", "org.apache.hadoop.hive.metastore.ObjectStore", - "Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. \n" + - "This class is used to store and retrieval of raw metadata objects such as table, database"), - METASTORE_CONNECTION_DRIVER("javax.jdo.option.ConnectionDriverName", "org.apache.derby.jdbc.EmbeddedDriver", - "Driver class name for a JDBC metastore"), - METASTORE_MANAGER_FACTORY_CLASS("javax.jdo.PersistenceManagerFactoryClass", - "org.datanucleus.api.jdo.JDOPersistenceManagerFactory", - "class implementing the jdo persistence"), - METASTORE_EXPRESSION_PROXY_CLASS("hive.metastore.expression.proxy", - "org.apache.hadoop.hive.ql.optimizer.ppr.PartitionExpressionForMetastore", ""), - METASTORE_DETACH_ALL_ON_COMMIT("javax.jdo.option.DetachAllOnCommit", true, - "Detaches all objects from session so that they can be used after transaction is committed"), - METASTORE_NON_TRANSACTIONAL_READ("javax.jdo.option.NonTransactionalRead", true, - "Reads outside of transactions"), - METASTORE_CONNECTION_USER_NAME("javax.jdo.option.ConnectionUserName", "APP", - "Username to use against metastore database"), - METASTORE_END_FUNCTION_LISTENERS("hive.metastore.end.function.listeners", "", - "List of comma separated listeners for the end of metastore functions."), - METASTORE_PART_INHERIT_TBL_PROPS("hive.metastore.partition.inherit.table.properties", "", - "List of comma separated keys occurring in table properties which will get inherited to newly created partitions. \n" + - "* implies all the keys will get inherited."), - METASTORE_FILTER_HOOK("hive.metastore.filter.hook", "org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl", - "Metastore hook class for filtering the metadata read results. If hive.security.authorization.manager" - + "is set to instance of HiveAuthorizerFactory, then this value is ignored."), - FIRE_EVENTS_FOR_DML("hive.metastore.dml.events", false, "If true, the metastore will be asked" + - " to fire events for DML operations"), - METASTORE_CLIENT_DROP_PARTITIONS_WITH_EXPRESSIONS("hive.metastore.client.drop.partitions.using.expressions", true, - "Choose whether dropping partitions with HCatClient pushes the partition-predicate to the metastore, " + - "or drops partitions iteratively"), - - METASTORE_AGGREGATE_STATS_CACHE_ENABLED("hive.metastore.aggregate.stats.cache.enabled", true, - "Whether aggregate stats caching is enabled or not."), - METASTORE_AGGREGATE_STATS_CACHE_SIZE("hive.metastore.aggregate.stats.cache.size", 10000, - "Maximum number of aggregate stats nodes that we will place in the metastore aggregate stats cache."), - METASTORE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS("hive.metastore.aggregate.stats.cache.max.partitions", 10000, - "Maximum number of partitions that are aggregated per cache node."), - METASTORE_AGGREGATE_STATS_CACHE_FPP("hive.metastore.aggregate.stats.cache.fpp", (float) 0.01, - "Maximum false positive probability for the Bloom Filter used in each aggregate stats cache node (default 1%)."), - METASTORE_AGGREGATE_STATS_CACHE_MAX_VARIANCE("hive.metastore.aggregate.stats.cache.max.variance", (float) 0.01, - "Maximum tolerable variance in number of partitions between a cached node and our request (default 1%)."), - METASTORE_AGGREGATE_STATS_CACHE_TTL("hive.metastore.aggregate.stats.cache.ttl", "600s", new TimeValidator(TimeUnit.SECONDS), - "Number of seconds for a cached node to be active in the cache before they become stale."), - METASTORE_AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT("hive.metastore.aggregate.stats.cache.max.writer.wait", "5000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "Number of milliseconds a writer will wait to acquire the writelock before giving up."), - METASTORE_AGGREGATE_STATS_CACHE_MAX_READER_WAIT("hive.metastore.aggregate.stats.cache.max.reader.wait", "1000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "Number of milliseconds a reader will wait to acquire the readlock before giving up."), - METASTORE_AGGREGATE_STATS_CACHE_MAX_FULL("hive.metastore.aggregate.stats.cache.max.full", (float) 0.9, - "Maximum cache full % after which the cache cleaner thread kicks in."), - METASTORE_AGGREGATE_STATS_CACHE_CLEAN_UNTIL("hive.metastore.aggregate.stats.cache.clean.until", (float) 0.8, - "The cleaner thread cleans until cache reaches this % full size."), - METASTORE_METRICS("hive.metastore.metrics.enabled", false, "Enable metrics on the metastore."), - - // Parameters for exporting metadata on table drop (requires the use of the) - // org.apache.hadoop.hive.ql.parse.MetaDataExportListener preevent listener - METADATA_EXPORT_LOCATION("hive.metadata.export.location", "", - "When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" + - "it is the location to which the metadata will be exported. The default is an empty string, which results in the \n" + - "metadata being exported to the current user's home directory on HDFS."), - MOVE_EXPORTED_METADATA_TO_TRASH("hive.metadata.move.exported.metadata.to.trash", true, - "When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" + - "this setting determines if the metadata that is exported will subsequently be moved to the user's trash directory \n" + - "alongside the dropped table data. This ensures that the metadata will be cleaned up along with the dropped table data."), - - // CLI - CLIIGNOREERRORS("hive.cli.errors.ignore", false, ""), - CLIPRINTCURRENTDB("hive.cli.print.current.db", false, - "Whether to include the current database in the Hive prompt."), - CLIPROMPT("hive.cli.prompt", "hive", - "Command line prompt configuration value. Other hiveconf can be used in this configuration value. \n" + - "Variable substitution will only be invoked at the Hive CLI startup."), - CLIPRETTYOUTPUTNUMCOLS("hive.cli.pretty.output.num.cols", -1, - "The number of columns to use when formatting output generated by the DESCRIBE PRETTY table_name command.\n" + - "If the value of this property is -1, then Hive will use the auto-detected terminal width."), - - HIVE_METASTORE_FS_HANDLER_CLS("hive.metastore.fs.handler.class", "org.apache.hadoop.hive.metastore.HiveMetaStoreFsImpl", ""), - - // Things we log in the jobconf - - // session identifier - HIVESESSIONID("hive.session.id", "", ""), - // whether session is running in silent mode or not - HIVESESSIONSILENT("hive.session.silent", false, ""), - - HIVE_SESSION_HISTORY_ENABLED("hive.session.history.enabled", false, - "Whether to log Hive query, query plan, runtime statistics etc."), - - HIVEQUERYSTRING("hive.query.string", "", - "Query being executed (might be multiple per a session)"), - - HIVEQUERYID("hive.query.id", "", - "ID for query being executed (might be multiple per a session)"), - - HIVEJOBNAMELENGTH("hive.jobname.length", 50, "max jobname length"), - - // hive jar - HIVEJAR("hive.jar.path", "", - "The location of hive_cli.jar that is used when submitting jobs in a separate jvm."), - HIVEAUXJARS("hive.aux.jars.path", "", - "The location of the plugin jars that contain implementations of user defined functions and serdes."), - - // reloadable jars - HIVERELOADABLEJARS("hive.reloadable.aux.jars.path", "", - "Jars can be renewed by executing reload command. And these jars can be " - + "used as the auxiliary classes like creating a UDF or SerDe."), - - // hive added files and jars - HIVEADDEDFILES("hive.added.files.path", "", "This an internal parameter."), - HIVEADDEDJARS("hive.added.jars.path", "", "This an internal parameter."), - HIVEADDEDARCHIVES("hive.added.archives.path", "", "This an internal parameter."), - - HIVE_CURRENT_DATABASE("hive.current.database", "", "Database name used by current session. Internal usage only.", true), - - // for hive script operator - HIVES_AUTO_PROGRESS_TIMEOUT("hive.auto.progress.timeout", "0s", - new TimeValidator(TimeUnit.SECONDS), - "How long to run autoprogressor for the script/UDTF operators.\n" + - "Set to 0 for forever."), - HIVESCRIPTAUTOPROGRESS("hive.script.auto.progress", false, - "Whether Hive Transform/Map/Reduce Clause should automatically send progress information to TaskTracker \n" + - "to avoid the task getting killed because of inactivity. Hive sends progress information when the script is \n" + - "outputting to stderr. This option removes the need of periodically producing stderr messages, \n" + - "but users should be cautious because this may prevent infinite loops in the scripts to be killed by TaskTracker."), - HIVESCRIPTIDENVVAR("hive.script.operator.id.env.var", "HIVE_SCRIPT_OPERATOR_ID", - "Name of the environment variable that holds the unique script operator ID in the user's \n" + - "transform function (the custom mapper/reducer that the user has specified in the query)"), - HIVESCRIPTTRUNCATEENV("hive.script.operator.truncate.env", false, - "Truncate each environment variable for external script in scripts operator to 20KB (to fit system limits)"), - HIVESCRIPT_ENV_BLACKLIST("hive.script.operator.env.blacklist", - "hive.txn.valid.txns,hive.script.operator.env.blacklist", - "Comma separated list of keys from the configuration file not to convert to environment " + - "variables when envoking the script operator"), - HIVEMAPREDMODE("hive.mapred.mode", "nonstrict", - "The mode in which the Hive operations are being performed. \n" + - "In strict mode, some risky queries are not allowed to run. They include:\n" + - " Cartesian Product.\n" + - " No partition being picked up for a query.\n" + - " Comparing bigints and strings.\n" + - " Comparing bigints and doubles.\n" + - " Orderby without limit."), - HIVEALIAS("hive.alias", "", ""), - HIVEMAPSIDEAGGREGATE("hive.map.aggr", true, "Whether to use map-side aggregation in Hive Group By queries"), - HIVEGROUPBYSKEW("hive.groupby.skewindata", false, "Whether there is skew in data to optimize group by queries"), - HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000, - "How many rows in the right-most join operand Hive should buffer before emitting the join result."), - HIVEJOINCACHESIZE("hive.join.cache.size", 25000, - "How many rows in the joining tables (except the streaming table) should be cached in memory."), - - // CBO related - HIVE_CBO_ENABLED("hive.cbo.enable", true, "Flag to control enabling Cost Based Optimizations using Calcite framework."), - HIVE_CBO_RETPATH_HIVEOP("hive.cbo.returnpath.hiveop", false, "Flag to control calcite plan to hive operator conversion"), - HIVE_CBO_EXTENDED_COST_MODEL("hive.cbo.costmodel.extended", false, "Flag to control enabling the extended cost model based on" - + "CPU, IO and cardinality. Otherwise, the cost model is based on cardinality."), - HIVE_CBO_COST_MODEL_CPU("hive.cbo.costmodel.cpu", "0.000001", "Default cost of a comparison"), - HIVE_CBO_COST_MODEL_NET("hive.cbo.costmodel.network", "150.0", "Default cost of a transfering a byte over network;" - + " expressed as multiple of CPU cost"), - HIVE_CBO_COST_MODEL_LFS_WRITE("hive.cbo.costmodel.local.fs.write", "4.0", "Default cost of writing a byte to local FS;" - + " expressed as multiple of NETWORK cost"), - HIVE_CBO_COST_MODEL_LFS_READ("hive.cbo.costmodel.local.fs.read", "4.0", "Default cost of reading a byte from local FS;" - + " expressed as multiple of NETWORK cost"), - HIVE_CBO_COST_MODEL_HDFS_WRITE("hive.cbo.costmodel.hdfs.write", "10.0", "Default cost of writing a byte to HDFS;" - + " expressed as multiple of Local FS write cost"), - HIVE_CBO_COST_MODEL_HDFS_READ("hive.cbo.costmodel.hdfs.read", "1.5", "Default cost of reading a byte from HDFS;" - + " expressed as multiple of Local FS read cost"), - AGGR_JOIN_TRANSPOSE("hive.transpose.aggr.join", false, "push aggregates through join"), - - // hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row, - // need to remove by hive .13. Also, do not change default (see SMB operator) - HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100, ""), - - HIVEMAPJOINUSEOPTIMIZEDTABLE("hive.mapjoin.optimized.hashtable", true, - "Whether Hive should use memory-optimized hash table for MapJoin. Only works on Tez,\n" + - "because memory-optimized hashtable cannot be serialized."), - HIVEMAPJOINOPTIMIZEDTABLEPROBEPERCENT("hive.mapjoin.optimized.hashtable.probe.percent", - (float) 0.5, "Probing space percentage of the optimized hashtable"), - HIVEUSEHYBRIDGRACEHASHJOIN("hive.mapjoin.hybridgrace.hashtable", true, "Whether to use hybrid" + - "grace hash join as the join method for mapjoin. Tez only."), - HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ("hive.mapjoin.hybridgrace.memcheckfrequency", 1024, "For " + - "hybrid grace hash join, how often (how many rows apart) we check if memory is full. " + - "This number should be power of 2."), - HIVEHYBRIDGRACEHASHJOINMINWBSIZE("hive.mapjoin.hybridgrace.minwbsize", 524288, "For hybrid grace" + - "Hash join, the minimum write buffer size used by optimized hashtable. Default is 512 KB."), - HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS("hive.mapjoin.hybridgrace.minnumpartitions", 16, "For" + - "Hybrid grace hash join, the minimum number of partitions to create."), - HIVEHASHTABLEWBSIZE("hive.mapjoin.optimized.hashtable.wbsize", 8 * 1024 * 1024, - "Optimized hashtable (see hive.mapjoin.optimized.hashtable) uses a chain of buffers to\n" + - "store data. This is one buffer size. HT may be slightly faster if this is larger, but for small\n" + - "joins unnecessary memory will be allocated and then trimmed."), - - HIVESMBJOINCACHEROWS("hive.smbjoin.cache.rows", 10000, - "How many rows with the same key value should be cached in memory per smb joined table."), - HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000, - "Number of rows after which size of the grouping keys/aggregation classes is performed"), - HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.5, - "Portion of total memory to be used by map-side group aggregation hash table"), - HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory", (float) 0.3, - "Portion of total memory to be used by map-side group aggregation hash table, when this group by is followed by map join"), - HIVEMAPAGGRMEMORYTHRESHOLD("hive.map.aggr.hash.force.flush.memory.threshold", (float) 0.9, - "The max memory to be used by map-side group aggregation hash table.\n" + - "If the memory usage is higher than this number, force to flush data"), - HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5, - "Hash aggregation will be turned off if the ratio between hash table size and input rows is bigger than this number. \n" + - "Set to 1 to make sure hash aggregation is never turned off."), - HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true, - "Whether to optimize multi group by query to generate single M/R job plan. If the multi group by query has \n" + - "common group by keys, it will be optimized to generate single M/R job."), - HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", false, - "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" + - "the group by in the mapper by using BucketizedHiveInputFormat. The only downside to this\n" + - "is that it limits the number of mappers to the number of files."), - HIVE_MAP_GROUPBY_SORT_TESTMODE("hive.map.groupby.sorted.testmode", false, - "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" + - "the group by in the mapper by using BucketizedHiveInputFormat. If the test mode is set, the plan\n" + - "is not converted, but a query property is set to denote the same."), - HIVE_GROUPBY_ORDERBY_POSITION_ALIAS("hive.groupby.orderby.position.alias", false, - "Whether to enable using Column Position Alias in Group By or Order By"), - HIVE_NEW_JOB_GROUPING_SET_CARDINALITY("hive.new.job.grouping.set.cardinality", 30, - "Whether a new map-reduce job should be launched for grouping sets/rollups/cubes.\n" + - "For a query like: select a, b, c, count(1) from T group by a, b, c with rollup;\n" + - "4 rows are created per row: (a, b, c), (a, b, null), (a, null, null), (null, null, null).\n" + - "This can lead to explosion across map-reduce boundary if the cardinality of T is very high,\n" + - "and map-side aggregation does not do a very good job. \n" + - "\n" + - "This parameter decides if Hive should add an additional map-reduce job. If the grouping set\n" + - "cardinality (4 in the example above), is more than this value, a new MR job is added under the\n" + - "assumption that the original group by will reduce the data size."), - - // Max filesize used to do a single copy (after that, distcp is used) - HIVE_EXEC_COPYFILE_MAXSIZE("hive.exec.copyfile.maxsize", 32L * 1024 * 1024 /*32M*/, - "Maximum file size (in Mb) that Hive uses to do single HDFS copies between directories." + - "Distributed copies (distcp) will be used instead for bigger files so that copies can be done faster."), - - // for hive udtf operator - HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false, - "Whether Hive should automatically send progress information to TaskTracker \n" + - "when using UDTF's to prevent the task getting killed because of inactivity. Users should be cautious \n" + - "because this may prevent TaskTracker from killing tasks with infinite loops."), - - HIVEDEFAULTFILEFORMAT("hive.default.fileformat", "TextFile", new StringSet("TextFile", "SequenceFile", "RCfile", "ORC"), - "Default file format for CREATE TABLE statement. Users can explicitly override it by CREATE TABLE ... STORED AS [FORMAT]"), - HIVEDEFAULTMANAGEDFILEFORMAT("hive.default.fileformat.managed", "none", - new StringSet("none", "TextFile", "SequenceFile", "RCfile", "ORC"), - "Default file format for CREATE TABLE statement applied to managed tables only. External tables will be \n" + - "created with format specified by hive.default.fileformat. Leaving this null will result in using hive.default.fileformat \n" + - "for all tables."), - HIVEQUERYRESULTFILEFORMAT("hive.query.result.fileformat", "TextFile", new StringSet("TextFile", "SequenceFile", "RCfile"), - "Default file format for storing result of the query."), - HIVECHECKFILEFORMAT("hive.fileformat.check", true, "Whether to check file format or not when loading data files"), - - // default serde for rcfile - HIVEDEFAULTRCFILESERDE("hive.default.rcfile.serde", - "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe", - "The default SerDe Hive will use for the RCFile format"), - - HIVEDEFAULTSERDE("hive.default.serde", - "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", - "The default SerDe Hive will use for storage formats that do not specify a SerDe."), - - SERDESUSINGMETASTOREFORSCHEMA("hive.serdes.using.metastore.for.schema", - "org.apache.hadoop.hive.ql.io.orc.OrcSerde," + - "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," + - "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe," + - "org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe," + - "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe," + - "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe," + - "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe," + - "org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe", - "SerDes retrieving schema from metastore. This is an internal parameter."), - - HIVEHISTORYFILELOC("hive.querylog.location", - "${system:java.io.tmpdir}" + File.separator + "${system:user.name}", - "Location of Hive run time structured log file"), - - HIVE_LOG_INCREMENTAL_PLAN_PROGRESS("hive.querylog.enable.plan.progress", true, - "Whether to log the plan's progress every time a job's progress is checked.\n" + - "These logs are written to the location specified by hive.querylog.location"), - - HIVE_LOG_INCREMENTAL_PLAN_PROGRESS_INTERVAL("hive.querylog.plan.progress.interval", "60000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "The interval to wait between logging the plan's progress.\n" + - "If there is a whole number percentage change in the progress of the mappers or the reducers,\n" + - "the progress is logged regardless of this value.\n" + - "The actual interval will be the ceiling of (this value divided by the value of\n" + - "hive.exec.counters.pull.interval) multiplied by the value of hive.exec.counters.pull.interval\n" + - "I.e. if it is not divide evenly by the value of hive.exec.counters.pull.interval it will be\n" + - "logged less frequently than specified.\n" + - "This only has an effect if hive.querylog.enable.plan.progress is set to true."), - - HIVESCRIPTSERDE("hive.script.serde", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", - "The default SerDe for transmitting input data to and reading output data from the user scripts. "), - HIVESCRIPTRECORDREADER("hive.script.recordreader", - "org.apache.hadoop.hive.ql.exec.TextRecordReader", - "The default record reader for reading data from the user scripts. "), - HIVESCRIPTRECORDWRITER("hive.script.recordwriter", - "org.apache.hadoop.hive.ql.exec.TextRecordWriter", - "The default record writer for writing data to the user scripts. "), - HIVESCRIPTESCAPE("hive.transform.escape.input", false, - "This adds an option to escape special chars (newlines, carriage returns and\n" + - "tabs) when they are passed to the user script. This is useful if the Hive tables\n" + - "can contain data that contains special characters."), - HIVEBINARYRECORDMAX("hive.binary.record.max.length", 1000, - "Read from a binary stream and treat each hive.binary.record.max.length bytes as a record. \n" + - "The last record before the end of stream can have less than hive.binary.record.max.length bytes"), - - // HWI - HIVEHWILISTENHOST("hive.hwi.listen.host", "0.0.0.0", "This is the host address the Hive Web Interface will listen on"), - HIVEHWILISTENPORT("hive.hwi.listen.port", "9999", "This is the port the Hive Web Interface will listen on"), - HIVEHWIWARFILE("hive.hwi.war.file", "${env:HWI_WAR_FILE}", - "This sets the path to the HWI war file, relative to ${HIVE_HOME}. "), - - HIVEHADOOPMAXMEM("hive.mapred.local.mem", 0, "mapper/reducer memory in local mode"), - - //small table file size - HIVESMALLTABLESFILESIZE("hive.mapjoin.smalltable.filesize", 25000000L, - "The threshold for the input file size of the small tables; if the file size is smaller \n" + - "than this threshold, it will try to convert the common join into map join"), - - HIVESAMPLERANDOMNUM("hive.sample.seednumber", 0, - "A number used to percentage sampling. By changing this number, user will change the subsets of data sampled."), - - // test mode in hive mode - HIVETESTMODE("hive.test.mode", false, - "Whether Hive is running in test mode. If yes, it turns on sampling and prefixes the output tablename.", - false), - HIVETESTMODEPREFIX("hive.test.mode.prefix", "test_", - "In test mode, specfies prefixes for the output table", false), - HIVETESTMODESAMPLEFREQ("hive.test.mode.samplefreq", 32, - "In test mode, specfies sampling frequency for table, which is not bucketed,\n" + - "For example, the following query:\n" + - " INSERT OVERWRITE TABLE dest SELECT col1 from src\n" + - "would be converted to\n" + - " INSERT OVERWRITE TABLE test_dest\n" + - " SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1))", false), - HIVETESTMODENOSAMPLE("hive.test.mode.nosamplelist", "", - "In test mode, specifies comma separated table names which would not apply sampling", false), - HIVETESTMODEDUMMYSTATAGGR("hive.test.dummystats.aggregator", "", "internal variable for test", false), - HIVETESTMODEDUMMYSTATPUB("hive.test.dummystats.publisher", "", "internal variable for test", false), - HIVETESTCURRENTTIMESTAMP("hive.test.currenttimestamp", null, "current timestamp for test", false), - - HIVEMERGEMAPFILES("hive.merge.mapfiles", true, - "Merge small files at the end of a map-only job"), - HIVEMERGEMAPREDFILES("hive.merge.mapredfiles", false, - "Merge small files at the end of a map-reduce job"), - HIVEMERGETEZFILES("hive.merge.tezfiles", false, "Merge small files at the end of a Tez DAG"), - HIVEMERGESPARKFILES("hive.merge.sparkfiles", false, "Merge small files at the end of a Spark DAG Transformation"), - HIVEMERGEMAPFILESSIZE("hive.merge.size.per.task", (long) (256 * 1000 * 1000), - "Size of merged files at the end of the job"), - HIVEMERGEMAPFILESAVGSIZE("hive.merge.smallfiles.avgsize", (long) (16 * 1000 * 1000), - "When the average output file size of a job is less than this number, Hive will start an additional \n" + - "map-reduce job to merge the output files into bigger files. This is only done for map-only jobs \n" + - "if hive.merge.mapfiles is true, and for map-reduce jobs if hive.merge.mapredfiles is true."), - HIVEMERGERCFILEBLOCKLEVEL("hive.merge.rcfile.block.level", true, ""), - HIVEMERGEORCFILESTRIPELEVEL("hive.merge.orcfile.stripe.level", true, - "When hive.merge.mapfiles, hive.merge.mapredfiles or hive.merge.tezfiles is enabled\n" + - "while writing a table with ORC file format, enabling this config will do stripe-level\n" + - "fast merge for small ORC files. Note that enabling this config will not honor the\n" + - "padding tolerance config (hive.exec.orc.block.padding.tolerance)."), - - HIVEUSEEXPLICITRCFILEHEADER("hive.exec.rcfile.use.explicit.header", true, - "If this is set the header for RCFiles will simply be RCF. If this is not\n" + - "set the header will be that borrowed from sequence files, e.g. SEQ- followed\n" + - "by the input and output RCFile formats."), - HIVEUSERCFILESYNCCACHE("hive.exec.rcfile.use.sync.cache", true, ""), - - HIVE_RCFILE_RECORD_INTERVAL("hive.io.rcfile.record.interval", Integer.MAX_VALUE, ""), - HIVE_RCFILE_COLUMN_NUMBER_CONF("hive.io.rcfile.column.number.conf", 0, ""), - HIVE_RCFILE_TOLERATE_CORRUPTIONS("hive.io.rcfile.tolerate.corruptions", false, ""), - HIVE_RCFILE_RECORD_BUFFER_SIZE("hive.io.rcfile.record.buffer.size", 4194304, ""), // 4M - - PARQUET_MEMORY_POOL_RATIO("parquet.memory.pool.ratio", 0.5f, - "Maximum fraction of heap that can be used by Parquet file writers in one task.\n" + - "It is for avoiding OutOfMemory error in tasks. Work with Parquet 1.6.0 and above.\n" + - "This config parameter is defined in Parquet, so that it does not start with 'hive.'."), - HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION("hive.parquet.timestamp.skip.conversion", true, - "Current Hive implementation of parquet stores timestamps to UTC, this flag allows skipping of the conversion" + - "on reading parquet files from other tools"), - HIVE_INT_TIMESTAMP_CONVERSION_IN_SECONDS("hive.int.timestamp.conversion.in.seconds", false, - "Boolean/tinyint/smallint/int/bigint value is interpreted as milliseconds during the timestamp conversion.\n" + - "Set this flag to true to interpret the value as seconds to be consistent with float/double." ), - HIVE_ORC_FILE_MEMORY_POOL("hive.exec.orc.memory.pool", 0.5f, - "Maximum fraction of heap that can be used by ORC file writers"), - HIVE_ORC_WRITE_FORMAT("hive.exec.orc.write.format", null, - "Define the version of the file to write. Possible values are 0.11 and 0.12.\n" + - "If this parameter is not defined, ORC will use the run length encoding (RLE)\n" + - "introduced in Hive 0.12. Any value other than 0.11 results in the 0.12 encoding."), - HIVE_ORC_DEFAULT_STRIPE_SIZE("hive.exec.orc.default.stripe.size", - 64L * 1024 * 1024, - "Define the default ORC stripe size, in bytes."), - HIVE_ORC_DEFAULT_BLOCK_SIZE("hive.exec.orc.default.block.size", 256L * 1024 * 1024, - "Define the default file system block size for ORC files."), - - HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD("hive.exec.orc.dictionary.key.size.threshold", 0.8f, - "If the number of keys in a dictionary is greater than this fraction of the total number of\n" + - "non-null rows, turn off dictionary encoding. Use 1 to always use dictionary encoding."), - HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE("hive.exec.orc.default.row.index.stride", 10000, - "Define the default ORC index stride in number of rows. (Stride is the number of rows\n" + - "an index entry represents.)"), - HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK("hive.orc.row.index.stride.dictionary.check", true, - "If enabled dictionary check will happen after first row index stride (default 10000 rows)\n" + - "else dictionary check will happen before writing first stripe. In both cases, the decision\n" + - "to use dictionary or not will be retained thereafter."), - HIVE_ORC_DEFAULT_BUFFER_SIZE("hive.exec.orc.default.buffer.size", 256 * 1024, - "Define the default ORC buffer size, in bytes."), - HIVE_ORC_DEFAULT_BLOCK_PADDING("hive.exec.orc.default.block.padding", true, - "Define the default block padding, which pads stripes to the HDFS block boundaries."), - HIVE_ORC_BLOCK_PADDING_TOLERANCE("hive.exec.orc.block.padding.tolerance", 0.05f, - "Define the tolerance for block padding as a decimal fraction of stripe size (for\n" + - "example, the default value 0.05 is 5% of the stripe size). For the defaults of 64Mb\n" + - "ORC stripe and 256Mb HDFS blocks, the default block padding tolerance of 5% will\n" + - "reserve a maximum of 3.2Mb for padding within the 256Mb block. In that case, if the\n" + - "available size within the block is more than 3.2Mb, a new smaller stripe will be\n" + - "inserted to fit within that space. This will make sure that no stripe written will\n" + - "cross block boundaries and cause remote reads within a node local task."), - HIVE_ORC_DEFAULT_COMPRESS("hive.exec.orc.default.compress", "ZLIB", "Define the default compression codec for ORC file"), - - HIVE_ORC_ENCODING_STRATEGY("hive.exec.orc.encoding.strategy", "SPEED", new StringSet("SPEED", "COMPRESSION"), - "Define the encoding strategy to use while writing data. Changing this will\n" + - "only affect the light weight encoding for integers. This flag will not\n" + - "change the compression level of higher level compression codec (like ZLIB)."), - - HIVE_ORC_COMPRESSION_STRATEGY("hive.exec.orc.compression.strategy", "SPEED", new StringSet("SPEED", "COMPRESSION"), - "Define the compression strategy to use while writing data. \n" + - "This changes the compression level of higher level compression codec (like ZLIB)."), - - HIVE_ORC_SPLIT_STRATEGY("hive.exec.orc.split.strategy", "HYBRID", new StringSet("HYBRID", "BI", "ETL"), - "This is not a user level config. BI strategy is used when the requirement is to spend less time in split generation" + - " as opposed to query execution (split generation does not read or cache file footers)." + - " ETL strategy is used when spending little more time in split generation is acceptable" + - " (split generation reads and caches file footers). HYBRID chooses between the above strategies" + - " based on heuristics."), - - HIVE_ORC_MS_FOOTER_CACHE_ENABLED("hive.orc.splits.ms.footer.cache.enabled", false, - "Whether to enable using file metadata cache in metastore for ORC file footers."), - - HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS("hive.orc.splits.include.file.footer", false, - "If turned on splits generated by orc will include metadata about the stripes in the file. This\n" + - "data is read remotely (from the client or HS2 machine) and sent to all the tasks."), - HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS("hive.orc.splits.include.fileid", true, - "Include file ID in splits on file systems thaty support it."), - HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE("hive.orc.cache.stripe.details.size", 10000, - "Max cache size for keeping meta info about orc splits cached in the client."), - HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS("hive.orc.compute.splits.num.threads", 10, - "How many threads orc should use to create splits in parallel."), - HIVE_ORC_SKIP_CORRUPT_DATA("hive.exec.orc.skip.corrupt.data", false, - "If ORC reader encounters corrupt data, this value will be used to determine\n" + - "whether to skip the corrupt data or throw exception. The default behavior is to throw exception."), - - HIVE_ORC_ZEROCOPY("hive.exec.orc.zerocopy", false, - "Use zerocopy reads with ORC. (This requires Hadoop 2.3 or later.)"), - - HIVE_LAZYSIMPLE_EXTENDED_BOOLEAN_LITERAL("hive.lazysimple.extended_boolean_literal", false, - "LazySimpleSerde uses this property to determine if it treats 'T', 't', 'F', 'f',\n" + - "'1', and '0' as extened, legal boolean literal, in addition to 'TRUE' and 'FALSE'.\n" + - "The default is false, which means only 'TRUE' and 'FALSE' are treated as legal\n" + - "boolean literal."), - - HIVESKEWJOIN("hive.optimize.skewjoin", false, - "Whether to enable skew join optimization. \n" + - "The algorithm is as follows: At runtime, detect the keys with a large skew. Instead of\n" + - "processing those keys, store them temporarily in an HDFS directory. In a follow-up map-reduce\n" + - "job, process those skewed keys. The same key need not be skewed for all the tables, and so,\n" + - "the follow-up map-reduce job (for the skewed keys) would be much faster, since it would be a\n" + - "map-join."), - HIVEDYNAMICPARTITIONHASHJOIN("hive.optimize.dynamic.partition.hashjoin", false, - "Whether to enable dynamically partitioned hash join optimization. \n" + - "This setting is also dependent on enabling hive.auto.convert.join"), - HIVECONVERTJOIN("hive.auto.convert.join", true, - "Whether Hive enables the optimization about converting common join into mapjoin based on the input file size"), - HIVECONVERTJOINNOCONDITIONALTASK("hive.auto.convert.join.noconditionaltask", true, - "Whether Hive enables the optimization about converting common join into mapjoin based on the input file size. \n" + - "If this parameter is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than the\n" + - "specified size, the join is directly converted to a mapjoin (there is no conditional task)."), - - HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD("hive.auto.convert.join.noconditionaltask.size", - 10000000L, - "If hive.auto.convert.join.noconditionaltask is off, this parameter does not take affect. \n" + - "However, if it is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than this size, \n" + - "the join is directly converted to a mapjoin(there is no conditional task). The default is 10MB"), - HIVECONVERTJOINUSENONSTAGED("hive.auto.convert.join.use.nonstaged", false, - "For conditional joins, if input stream from a small alias can be directly applied to join operator without \n" + - "filtering or projection, the alias need not to be pre-staged in distributed cache via mapred local task.\n" + - "Currently, this is not working with vectorization or tez execution engine."), - HIVESKEWJOINKEY("hive.skewjoin.key", 100000, - "Determine if we get a skew key in join. If we see more than the specified number of rows with the same key in join operator,\n" + - "we think the key as a skew join key. "), - HIVESKEWJOINMAPJOINNUMMAPTASK("hive.skewjoin.mapjoin.map.tasks", 10000, - "Determine the number of map task used in the follow up map join job for a skew join.\n" + - "It should be used together with hive.skewjoin.mapjoin.min.split to perform a fine grained control."), - HIVESKEWJOINMAPJOINMINSPLIT("hive.skewjoin.mapjoin.min.split", 33554432L, - "Determine the number of map task at most used in the follow up map join job for a skew join by specifying \n" + - "the minimum split size. It should be used together with hive.skewjoin.mapjoin.map.tasks to perform a fine grained control."), - - HIVESENDHEARTBEAT("hive.heartbeat.interval", 1000, - "Send a heartbeat after this interval - used by mapjoin and filter operators"), - HIVELIMITMAXROWSIZE("hive.limit.row.max.size", 100000L, - "When trying a smaller subset of data for simple LIMIT, how much size we need to guarantee each row to have at least."), - HIVELIMITOPTLIMITFILE("hive.limit.optimize.limit.file", 10, - "When trying a smaller subset of data for simple LIMIT, maximum number of files we can sample."), - HIVELIMITOPTENABLE("hive.limit.optimize.enable", false, - "Whether to enable to optimization to trying a smaller subset of data for simple LIMIT first."), - HIVELIMITOPTMAXFETCH("hive.limit.optimize.fetch.max", 50000, - "Maximum number of rows allowed for a smaller subset of data for simple LIMIT, if it is a fetch query. \n" + - "Insert queries are not restricted by this limit."), - HIVELIMITPUSHDOWNMEMORYUSAGE("hive.limit.pushdown.memory.usage", -1f, - "The max memory to be used for hash in RS operator for top K selection."), - HIVELIMITTABLESCANPARTITION("hive.limit.query.max.table.partition", -1, - "This controls how many partitions can be scanned for each partitioned table.\n" + - "The default value \"-1\" means no limit."), - - HIVEHASHTABLEKEYCOUNTADJUSTMENT("hive.hashtable.key.count.adjustment", 1.0f, - "Adjustment to mapjoin hashtable size derived from table and column statistics; the estimate" + - " of the number of keys is divided by this value. If the value is 0, statistics are not used" + - "and hive.hashtable.initialCapacity is used instead."), - HIVEHASHTABLETHRESHOLD("hive.hashtable.initialCapacity", 100000, "Initial capacity of " + - "mapjoin hashtable if statistics are absent, or if hive.hashtable.stats.key.estimate.adjustment is set to 0"), - HIVEHASHTABLELOADFACTOR("hive.hashtable.loadfactor", (float) 0.75, ""), - HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE("hive.mapjoin.followby.gby.localtask.max.memory.usage", (float) 0.55, - "This number means how much memory the local task can take to hold the key/value into an in-memory hash table \n" + - "when this map join is followed by a group by. If the local task's memory usage is more than this number, \n" + - "the local task will abort by itself. It means the data of the small table is too large to be held in memory."), - HIVEHASHTABLEMAXMEMORYUSAGE("hive.mapjoin.localtask.max.memory.usage", (float) 0.90, - "This number means how much memory the local task can take to hold the key/value into an in-memory hash table. \n" + - "If the local task's memory usage is more than this number, the local task will abort by itself. \n" + - "It means the data of the small table is too large to be held in memory."), - HIVEHASHTABLESCALE("hive.mapjoin.check.memory.rows", (long)100000, - "The number means after how many rows processed it needs to check the memory usage"), - - HIVEDEBUGLOCALTASK("hive.debug.localtask",false, ""), - - HIVEINPUTFORMAT("hive.input.format", "org.apache.hadoop.hive.ql.io.CombineHiveInputFormat", - "The default input format. Set this to HiveInputFormat if you encounter problems with CombineHiveInputFormat."), - HIVETEZINPUTFORMAT("hive.tez.input.format", "org.apache.hadoop.hive.ql.io.HiveInputFormat", - "The default input format for tez. Tez groups splits in the AM."), - - HIVETEZCONTAINERSIZE("hive.tez.container.size", -1, - "By default Tez will spawn containers of the size of a mapper. This can be used to overwrite."), - HIVETEZCPUVCORES("hive.tez.cpu.vcores", -1, - "By default Tez will ask for however many cpus map-reduce is configured to use per container.\n" + - "This can be used to overwrite."), - HIVETEZJAVAOPTS("hive.tez.java.opts", null, - "By default Tez will use the Java options from map tasks. This can be used to overwrite."), - HIVETEZLOGLEVEL("hive.tez.log.level", "INFO", - "The log level to use for tasks executing as part of the DAG.\n" + - "Used only if hive.tez.java.opts is used to configure Java options."), - - HIVEENFORCEBUCKETING("hive.enforce.bucketing", false, - "Whether bucketing is enforced. If true, while inserting into the table, bucketing is enforced."), - HIVEENFORCESORTING("hive.enforce.sorting", false, - "Whether sorting is enforced. If true, while inserting into the table, sorting is enforced."), - HIVEOPTIMIZEBUCKETINGSORTING("hive.optimize.bucketingsorting", true, - "If hive.enforce.bucketing or hive.enforce.sorting is true, don't create a reducer for enforcing \n" + - "bucketing/sorting for queries of the form: \n" + - "insert overwrite table T2 select * from T1;\n" + - "where T1 and T2 are bucketed/sorted by the same keys into the same number of buckets."), - HIVEPARTITIONER("hive.mapred.partitioner", "org.apache.hadoop.hive.ql.io.DefaultHivePartitioner", ""), - HIVEENFORCESORTMERGEBUCKETMAPJOIN("hive.enforce.sortmergebucketmapjoin", false, - "If the user asked for sort-merge bucketed map-side join, and it cannot be performed, should the query fail or not ?"), - HIVEENFORCEBUCKETMAPJOIN("hive.enforce.bucketmapjoin", false, - "If the user asked for bucketed map-side join, and it cannot be performed, \n" + - "should the query fail or not ? For example, if the buckets in the tables being joined are\n" + - "not a multiple of each other, bucketed map-side join cannot be performed, and the\n" + - "query will fail if hive.enforce.bucketmapjoin is set to true."), - - HIVE_AUTO_SORTMERGE_JOIN("hive.auto.convert.sortmerge.join", false, - "Will the join be automatically converted to a sort-merge join, if the joined tables pass the criteria for sort-merge join."), - HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR( - "hive.auto.convert.sortmerge.join.bigtable.selection.policy", - "org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ", - "The policy to choose the big table for automatic conversion to sort-merge join. \n" + - "By default, the table with the largest partitions is assigned the big table. All policies are:\n" + - ". based on position of the table - the leftmost table is selected\n" + - "org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ.\n" + - ". based on total size (all the partitions selected in the query) of the table \n" + - "org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoS
<TRUNCATED>