Github user liancheng commented on a diff in the pull request: https://github.com/apache/spark/pull/3640#discussion_r21512701 --- Diff: sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala --- @@ -54,47 +54,95 @@ private[hive] abstract class HiveFunctionRegistry val functionClassName = functionInfo.getFunctionClass.getName if (classOf[UDF].isAssignableFrom(functionInfo.getFunctionClass)) { - HiveSimpleUdf(functionClassName, children) + HiveSimpleUdf(new HiveFunctionCache(functionClassName), children) } else if (classOf[GenericUDF].isAssignableFrom(functionInfo.getFunctionClass)) { - HiveGenericUdf(functionClassName, children) + HiveGenericUdf(new HiveFunctionCache(functionClassName), children) } else if ( classOf[AbstractGenericUDAFResolver].isAssignableFrom(functionInfo.getFunctionClass)) { - HiveGenericUdaf(functionClassName, children) + HiveGenericUdaf(new HiveFunctionCache(functionClassName), children) } else if (classOf[UDAF].isAssignableFrom(functionInfo.getFunctionClass)) { - HiveUdaf(functionClassName, children) + HiveUdaf(new HiveFunctionCache(functionClassName), children) } else if (classOf[GenericUDTF].isAssignableFrom(functionInfo.getFunctionClass)) { - HiveGenericUdtf(functionClassName, Nil, children) + HiveGenericUdtf(new HiveFunctionCache(functionClassName), Nil, children) } else { sys.error(s"No handler for udf ${functionInfo.getFunctionClass}") } } } -private[hive] trait HiveFunctionFactory { - val functionClassName: String - - def createFunction[UDFType]() = - getContextOrSparkClassLoader.loadClass(functionClassName).newInstance.asInstanceOf[UDFType] -} - -private[hive] abstract class HiveUdf extends Expression with Logging with HiveFunctionFactory { - self: Product => +/** + * This class provides the UDF creation and also the UDF instance serialization and + * de-serialization cross process boundary. + * + * We use class instead of trait, seems property variables of trait cannot be serialized when + * bundled with Case Class; in the other hand, we need to intercept the UDF instance ser/de. + * the "Has-a" probably better than "Is-a". + * @param functionClassName UDF class name + */ +class HiveFunctionCache(var functionClassName: String) extends java.io.Externalizable { --- End diff -- Another comment, related to `HiveShim`. I was thinking to move this class rather than `de/serializePlan` methods into the shim layer. Hive 0.12.0 is not affected by SPARK-4785, thus the version in 0.12.0 shim can be very simple. We only need to handle 0.13.1 there. This also lowers the possibility of breaking 0.12.0 code paths. Also after moving this class into the shim layer, as I've mentioned in another comment, instead of relying on `de/serializePlan`, we can just mimic `Utilities.de/serializeObjectByKryo` in `read/writeExternal` methods in this class.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org