pan3793 commented on code in PR #50022:
URL: https://github.com/apache/spark/pull/50022#discussion_r1967508152


##########
sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala:
##########
@@ -1407,13 +1408,74 @@ private[hive] object HiveClientImpl extends Logging {
       case _ =>
         new HiveConf(conf, classOf[HiveConf])
     }
-    try {
+    val hive = try {
       Hive.getWithoutRegisterFns(hiveConf)
     } catch {
       // SPARK-37069: not all Hive versions have the above method (e.g., Hive 
2.3.9 has it but
-      // 2.3.8 don't), therefore here we fallback when encountering the 
exception.
+      // 2.3.8 doesn't), therefore here we fallback when encountering the 
exception.
       case _: NoSuchMethodError =>
         Hive.get(hiveConf)
     }
+
+    // Follow behavior of HIVE-26633 (4.0.0), only apply the max message size 
when
+    // `hive.thrift.client.max.message.size` is set and the value is positive
+    Option(hiveConf.get("hive.thrift.client.max.message.size"))
+      .map(HiveConf.toSizeBytes(_).toInt).filter(_ > 0)
+      .foreach { maxMessageSize =>
+        logDebug(s"Trying to set metastore client thrift max message to 
$maxMessageSize")
+        configureMaxThriftMessageSize(hiveConf, hive.getMSC, maxMessageSize)
+      }
+
+    hive
+  }
+
+  private def getFieldValue[T](obj: Any, fieldName: String): T = {
+    val field = obj.getClass.getDeclaredField(fieldName)
+    field.setAccessible(true)
+    field.get(obj).asInstanceOf[T]
+  }
+
+  private def findMethod(klass: Class[_], name: String, args: Class[_]*): 
Method = {
+    val method = klass.getDeclaredMethod(name, args: _*)
+    method.setAccessible(true)
+    method
+  }
+
+  // SPARK-49489: a surgery for Hive 2.3.10 due to lack of HIVE-26633
+  private def configureMaxThriftMessageSize(
+      hiveConf: HiveConf, msClient: IMetaStoreClient, maxMessageSize: Int): 
Unit = try {
+    msClient match {
+      // Hive uses Java Dynamic Proxy to enhance the MetaStoreClient to 
support synchronization
+      // and retrying, we should unwrap and access the real MetaStoreClient 
instance firstly
+      case proxy if JdkProxy.isProxyClass(proxy.getClass) =>
+        JdkProxy.getInvocationHandler(proxy) match {
+          case syncHandler if 
syncHandler.getClass.getName.endsWith("SynchronizedHandler") =>
+            val realMsc = getFieldValue[IMetaStoreClient](syncHandler, 
"client")
+            configureMaxThriftMessageSize(hiveConf, realMsc, maxMessageSize)
+          case retryHandler: RetryingMetaStoreClient =>
+            val realMsc = getFieldValue[IMetaStoreClient](retryHandler, "base")
+            configureMaxThriftMessageSize(hiveConf, realMsc, maxMessageSize)
+          case _ =>
+        }
+      case msc: HiveMetaStoreClient if !msc.isLocalMetaStore =>
+        msc.getTTransport match {
+          case t: TEndpointTransport =>
+            val currentMaxMessageSize = t.getConfiguration.getMaxMessageSize
+            if (currentMaxMessageSize != maxMessageSize) {
+              logDebug("Change the current metastore client thrift max message 
size from " +

Review Comment:
   @Madhukar525722 could you please try the latest patch, and monitor if this 
log is printed?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to