Github user kmanamcheri commented on a diff in the pull request: https://github.com/apache/spark/pull/22614#discussion_r223473324 --- Diff: sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala --- @@ -746,34 +746,45 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { getAllPartitionsMethod.invoke(hive, table).asInstanceOf[JSet[Partition]] } else { logDebug(s"Hive metastore filter is '$filter'.") + val shouldFallback = SQLConf.get.metastorePartitionPruningFallback val tryDirectSqlConfVar = HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL // We should get this config value from the metaStore. otherwise hit SPARK-18681. // To be compatible with hive-0.12 and hive-0.13, In the future we can achieve this by: // val tryDirectSql = hive.getMetaConf(tryDirectSqlConfVar.varname).toBoolean val tryDirectSql = hive.getMSC.getConfigValue(tryDirectSqlConfVar.varname, tryDirectSqlConfVar.defaultBoolVal.toString).toBoolean try { - // Hive may throw an exception when calling this method in some circumstances, such as - // when filtering on a non-string partition column when the hive config key - // hive.metastore.try.direct.sql is false getPartitionsByFilterMethod.invoke(hive, table, filter) .asInstanceOf[JArrayList[Partition]] } catch { - case ex: InvocationTargetException if ex.getCause.isInstanceOf[MetaException] && - !tryDirectSql => - logWarning("Caught Hive MetaException attempting to get partition metadata by " + - "filter from Hive. Falling back to fetching all partition metadata, which will " + - "degrade performance. Modifying your Hive metastore configuration to set " + - s"${tryDirectSqlConfVar.varname} to true may resolve this problem.", ex) - // HiveShim clients are expected to handle a superset of the requested partitions - getAllPartitionsMethod.invoke(hive, table).asInstanceOf[JSet[Partition]] - case ex: InvocationTargetException if ex.getCause.isInstanceOf[MetaException] && - tryDirectSql => - throw new RuntimeException("Caught Hive MetaException attempting to get partition " + - "metadata by filter from Hive. You can set the Spark configuration setting " + - s"${SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key} to false to work around this " + - "problem, however this will result in degraded performance. Please report a bug: " + - "https://issues.apache.org/jira/browse/SPARK", ex) + case ex: InvocationTargetException if ex.getCause.isInstanceOf[MetaException] => + if (shouldFallback) { + if (!tryDirectSql) { --- End diff -- Good idea. Done.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org