This is an automated email from the ASF dual-hosted git repository.

yumwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 50b36d13132 [SPARK-44454][SQL][HIVE] HiveShim getTablesByType support 
fallback
50b36d13132 is described below

commit 50b36d131326d1dc2503f20a35891e7f1b4b0194
Author: sychen <syc...@ctrip.com>
AuthorDate: Thu Jul 27 18:23:31 2023 +0800

    [SPARK-44454][SQL][HIVE] HiveShim getTablesByType support fallback
    
    ### What changes were proposed in this pull request?
    When `Shim_v2_3#getTablesByType` call returns no `get_tables_by_type` 
method, throw `SparkUnsupportedOperationException`.
    `HiveClientImpl#listTablesByType` will have a fallback call.
    
    ### Why are the changes needed?
    When we use a high version of Hive Client to communicate with a low version 
of Hive meta store, we may encounter Invalid method name: 'get_tables_by_type'.
    
    ```java
    23/07/17 12:45:24,391 [main] DEBUG SparkSqlParser: Parsing command: show 
views
    23/07/17 12:45:24,489 [main] ERROR log: Got exception: 
org.apache.thrift.TApplicationException Invalid method name: 
'get_tables_by_type'
    org.apache.thrift.TApplicationException: Invalid method name: 
'get_tables_by_type'
        at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:79)
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_get_tables_by_type(ThriftHiveMetastore.java:1433)
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.get_tables_by_type(ThriftHiveMetastore.java:1418)
        at 
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getTables(HiveMetaStoreClient.java:1411)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:173)
        at com.sun.proxy.$Proxy23.getTables(Unknown Source)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.apache.hadoop.hive.metastore.HiveMetaStoreClient$SynchronizedHandler.invoke(HiveMetaStoreClient.java:2344)
        at com.sun.proxy.$Proxy23.getTables(Unknown Source)
        at 
org.apache.hadoop.hive.ql.metadata.Hive.getTablesByType(Hive.java:1427)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.apache.spark.sql.hive.client.Shim_v2_3.getTablesByType(HiveShim.scala:1408)
        at 
org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$listTablesByType$1(HiveClientImpl.scala:789)
        at 
org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:294)
        at 
org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:225)
        at 
org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:224)
        at 
org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:274)
        at 
org.apache.spark.sql.hive.client.HiveClientImpl.listTablesByType(HiveClientImpl.scala:785)
        at 
org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$listViews$1(HiveExternalCatalog.scala:895)
        at 
org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:108)
        at 
org.apache.spark.sql.hive.HiveExternalCatalog.listViews(HiveExternalCatalog.scala:893)
        at 
org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.listViews(ExternalCatalogWithListener.scala:158)
        at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.listViews(SessionCatalog.scala:1040)
        at 
org.apache.spark.sql.execution.command.ShowViewsCommand.$anonfun$run$5(views.scala:407)
        at scala.Option.getOrElse(Option.scala:189)
        at 
org.apache.spark.sql.execution.command.ShowViewsCommand.run(views.scala:407)
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Use the built-in Hive 2.3.9 Client version to communicate with the Hive 
meta store version lower than 2.3, and test.
    
    Closes #42033 from cxzl25/SPARK-44454.
    
    Lead-authored-by: sychen <syc...@ctrip.com>
    Co-authored-by: cxzl25 <cxz...@users.noreply.github.com>
    Signed-off-by: Yuming Wang <yumw...@ebay.com>
---
 .../org/apache/spark/sql/hive/client/HiveShim.scala | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 63f672b22ba..338498d3d48 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -33,7 +33,7 @@ import org.apache.hadoop.hive.metastore.TableType
 import org.apache.hadoop.hive.metastore.api.{Database, EnvironmentContext, 
Function => HiveFunction, FunctionType, Index, MetaException, PrincipalType, 
ResourceType, ResourceUri}
 import org.apache.hadoop.hive.ql.Driver
 import org.apache.hadoop.hive.ql.io.AcidUtils
-import org.apache.hadoop.hive.ql.metadata.{Hive, Partition, Table}
+import org.apache.hadoop.hive.ql.metadata.{Hive, HiveException, Partition, 
Table}
 import org.apache.hadoop.hive.ql.plan.AddPartitionDesc
 import org.apache.hadoop.hive.ql.processors.{CommandProcessor, 
CommandProcessorFactory}
 import org.apache.hadoop.hive.ql.session.SessionState
@@ -1634,8 +1634,23 @@ private[client] class Shim_v2_3 extends Shim_v2_1 {
       pattern: String,
       tableType: TableType): Seq[String] = {
     recordHiveCall()
-    getTablesByTypeMethod.invoke(hive, dbName, pattern, tableType)
-      .asInstanceOf[JList[String]].asScala.toSeq
+    try {
+      getTablesByTypeMethod.invoke(hive, dbName, pattern, tableType)
+        .asInstanceOf[JList[String]].asScala.toSeq
+    } catch {
+      case ex: InvocationTargetException if 
ex.getCause.isInstanceOf[HiveException] =>
+        val cause = ex.getCause.getCause
+        if (cause != null && cause.isInstanceOf[MetaException] &&
+          cause.getMessage != null &&
+          cause.getMessage.contains("Invalid method name: 
'get_tables_by_type'")) {
+          // SparkUnsupportedOperationException (inherited from 
UnsupportedOperationException)
+          // is thrown when the Shim_v2_3#getTablesByType call returns no 
get_tables_by_type method.
+          // HiveClientImpl#listTablesByType will have fallback processing.
+          throw 
QueryExecutionErrors.getTablesByTypeUnsupportedByHiveVersionError()
+        } else {
+          throw ex
+        }
+    }
   }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to