Github user mallman commented on a diff in the pull request: https://github.com/apache/spark/pull/22614#discussion_r223422030 --- Diff: sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala --- @@ -746,34 +746,45 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { getAllPartitionsMethod.invoke(hive, table).asInstanceOf[JSet[Partition]] } else { logDebug(s"Hive metastore filter is '$filter'.") + val shouldFallback = SQLConf.get.metastorePartitionPruningFallback val tryDirectSqlConfVar = HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL // We should get this config value from the metaStore. otherwise hit SPARK-18681. // To be compatible with hive-0.12 and hive-0.13, In the future we can achieve this by: // val tryDirectSql = hive.getMetaConf(tryDirectSqlConfVar.varname).toBoolean val tryDirectSql = hive.getMSC.getConfigValue(tryDirectSqlConfVar.varname, tryDirectSqlConfVar.defaultBoolVal.toString).toBoolean try { - // Hive may throw an exception when calling this method in some circumstances, such as - // when filtering on a non-string partition column when the hive config key - // hive.metastore.try.direct.sql is false getPartitionsByFilterMethod.invoke(hive, table, filter) .asInstanceOf[JArrayList[Partition]] } catch { - case ex: InvocationTargetException if ex.getCause.isInstanceOf[MetaException] && - !tryDirectSql => - logWarning("Caught Hive MetaException attempting to get partition metadata by " + - "filter from Hive. Falling back to fetching all partition metadata, which will " + - "degrade performance. Modifying your Hive metastore configuration to set " + - s"${tryDirectSqlConfVar.varname} to true may resolve this problem.", ex) - // HiveShim clients are expected to handle a superset of the requested partitions - getAllPartitionsMethod.invoke(hive, table).asInstanceOf[JSet[Partition]] - case ex: InvocationTargetException if ex.getCause.isInstanceOf[MetaException] && - tryDirectSql => - throw new RuntimeException("Caught Hive MetaException attempting to get partition " + - "metadata by filter from Hive. You can set the Spark configuration setting " + - s"${SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key} to false to work around this " + - "problem, however this will result in degraded performance. Please report a bug: " + - "https://issues.apache.org/jira/browse/SPARK", ex) + case ex: InvocationTargetException if ex.getCause.isInstanceOf[MetaException] => + if (shouldFallback) { + if (!tryDirectSql) { + logWarning("Caught Hive MetaException attempting to get partition metadata by " + + "filter from Hive. Falling back to fetching all partition metadata, which will " + + "degrade performance. Modifying your Hive metastore configuration to set " + + s"${tryDirectSqlConfVar.varname} to true may resolve this problem.") + } else { + logWarning("Caught Hive MetaException attempting to get partition metadata " + + "by filter from Hive. Hive metastore's direct SQL feature has been enabled, " + + "but it is an optimistic optimization and not guaranteed to work. Falling back " + + "to fetching all partition metadata, which will degrade performance (for the " + + "current query). If you see this error consistently, you can set the Spark " + + s"configuration setting ${SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key} to " + + "false as a work around, however this will result in degraded performance. " + + "Please report a bug to Hive stating that direct SQL is failing consistently " + + "for the specified query: https://issues.apache.org/jira/browse/HIVE") --- End diff -- I think we should remove the suggestion to file a Hive project bug. Even with the direct SQL configuration setting enabled, there are valid metastore deployments for which it will be ignored. For example, my understanding is that if the metastore uses MongoDB for its underlying storage, the direct SQL configuration setting will be ignored. That means a failure here is not a Hive bug with direct SQL.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org