[ 
https://issues.apache.org/jira/browse/SPARK-29211?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Dilip Biswal updated SPARK-29211:
---------------------------------
    Description: 
I encountered this while writing documentation for SQL reference. Here is the 
small repro:

UDF:
 =====
{code:java}
import org.apache.hadoop.hive.ql.exec.UDF;
  
public class SimpleUdf extends UDF {
  public int evaluate(int value) {
    return value + 10;
  }
}
{code}
{code:java}
spark.sql("CREATE FUNCTION simple_udf AS 'SimpleUdf' USING JAR 
'/tmp/SimpleUdf.jar'").show
spark.sql("SELECT simple_udf(c1) AS function_return_value FROM t1").show
+---------------------+                                                         
|function_return_value|
+---------------------+
|                   11|
|                   12|
+---------------------+
spark.sql("SELECT simple_udf(c1) AS function_return_value FROM t1").show
scala> spark.sql("SELECT simple_udf(c1) AS function_return_value FROM t1").show
19/09/23 00:43:18 WARN HiveConf: HiveConf of name 
hive.internal.ss.authz.settings.applied.marker does not exist
19/09/23 00:43:18 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout does 
not exist
19/09/23 00:43:18 WARN HiveConf: HiveConf of name hive.stats.retries.wait does 
not exist
org.apache.spark.sql.AnalysisException: No handler for UDF/UDAF/UDTF 
'SimpleUdf': java.lang.ClassNotFoundException: SimpleUdf; line 1 pos 7
  at 
scala.reflect.internal.util.AbstractFileClassLoader.findClass(AbstractFileClassLoader.scala:72)
  at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
  at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
  at 
org.apache.spark.sql.hive.HiveShim$HiveFunctionWrapper.createFunction(HiveShim.scala:245)
  at 
org.apache.spark.sql.hive.HiveSimpleUDF.function$lzycompute(hiveUDFs.scala:57)
  at org.apache.spark.sql.hive.HiveSimpleUDF.function(hiveUDFs.scala:57)
  at 
org.apache.spark.sql.hive.HiveSimpleUDF.method$lzycompute(hiveUDFs.scala:61)
  at org.apache.spark.sql.hive.HiveSimpleUDF.method(hiveUDFs.scala:60)
  at 
org.apache.spark.sql.hive.HiveSimpleUDF.dataType$lzycompute(hiveUDFs.scala:78)
  at org.apache.spark.sql.hive.HiveSimpleUDF.dataType(hiveUDFs.scala:78)
  at 
org.apache.spark.sql.hive.HiveSessionCatalog.$anonfun$makeFunctionExpression$2(HiveSessionCatalog.scala:78)
  at scala.util.Failure.getOrElse(Try.scala:222)
  at 
org.apache.spark.sql.hive.HiveSessionCatalog.makeFunctionExpression(HiveSessionCatalog.scala:70)
  at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.$anonfun$makeFunctionBuilder$1(SessionCatalog.scala:1176)
  at 
org.apache.spark.sql.catalyst.analysis.SimpleFunctionRegistry.lookupFunction(FunctionRegistry.scala:121)
  at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction(SessionCatalog.scala:1344)
  at 
org.apache.spark.sql.hive.HiveSessionCatalog.super$lookupFunction(HiveSessionCatalog.scala:132)
  at 
org.apache.spark.sql.hive.HiveSessionCatalog.$anonfun$lookupFunction0$2(HiveSessionCatalog.scala:132)
{code}
Please note that the problem does not happen if we try it from a testsuite. So 
far i have only seen it when i try it from shell. Also i tried it in 2.4.4 and 
observe the same behaviour.

  was:
I encountered this while writing documentation for SQL reference. Here is the 
small repro:

UDF:
=====
{code}
import org.apache.hadoop.hive.ql.exec.UDF;
  
public class SimpleUdf extends UDF {
  public int evaluate(int value) {
    return value + 10;
  }
}
{code}

{code}
spark.sql("CREATE FUNCTION simple_udf AS 'SimpleUdf' USING JAR 
'/tmp/SimpleUdf.jar'").show
spark.sql("SELECT simple_udf(c1) AS function_return_value FROM t1").show
+---------------------+                                                         
|function_return_value|
+---------------------+
|                   11|
|                   12|
+---------------------+
spark.sql("SELECT simple_udf(c1) AS function_return_value FROM t1").show
scala> spark.sql("SELECT simple_udf(c1) AS function_return_value FROM t1").show
19/09/23 00:43:18 WARN HiveConf: HiveConf of name 
hive.internal.ss.authz.settings.applied.marker does not exist
19/09/23 00:43:18 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout does 
not exist
19/09/23 00:43:18 WARN HiveConf: HiveConf of name hive.stats.retries.wait does 
not exist
org.apache.spark.sql.AnalysisException: No handler for UDF/UDAF/UDTF 
'SimpleUdf': java.lang.ClassNotFoundException: SimpleUdf; line 1 pos 7
  at 
scala.reflect.internal.util.AbstractFileClassLoader.findClass(AbstractFileClassLoader.scala:72)
  at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
  at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
  at 
org.apache.spark.sql.hive.HiveShim$HiveFunctionWrapper.createFunction(HiveShim.scala:245)
  at 
org.apache.spark.sql.hive.HiveSimpleUDF.function$lzycompute(hiveUDFs.scala:57)
  at org.apache.spark.sql.hive.HiveSimpleUDF.function(hiveUDFs.scala:57)
  at 
org.apache.spark.sql.hive.HiveSimpleUDF.method$lzycompute(hiveUDFs.scala:61)
  at org.apache.spark.sql.hive.HiveSimpleUDF.method(hiveUDFs.scala:60)
  at 
org.apache.spark.sql.hive.HiveSimpleUDF.dataType$lzycompute(hiveUDFs.scala:78)
  at org.apache.spark.sql.hive.HiveSimpleUDF.dataType(hiveUDFs.scala:78)
  at 
org.apache.spark.sql.hive.HiveSessionCatalog.$anonfun$makeFunctionExpression$2(HiveSessionCatalog.scala:78)
  at scala.util.Failure.getOrElse(Try.scala:222)
  at 
org.apache.spark.sql.hive.HiveSessionCatalog.makeFunctionExpression(HiveSessionCatalog.scala:70)
  at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.$anonfun$makeFunctionBuilder$1(SessionCatalog.scala:1176)
  at 
org.apache.spark.sql.catalyst.analysis.SimpleFunctionRegistry.lookupFunction(FunctionRegistry.scala:121)
  at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction(SessionCatalog.scala:1344)
  at 
org.apache.spark.sql.hive.HiveSessionCatalog.super$lookupFunction(HiveSessionCatalog.scala:132)
  at 
org.apache.spark.sql.hive.HiveSessionCatalog.$anonfun$lookupFunction0$2(HiveSessionCatalog.scala:132)
{code}

Please note that the problem does not happen if we try it from a testsuite. So 
far i have only seen it when i try it from shell.




> Second invocation of custom UDF results in exception (when invoked from shell)
> ------------------------------------------------------------------------------
>
>                 Key: SPARK-29211
>                 URL: https://issues.apache.org/jira/browse/SPARK-29211
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.4.4
>            Reporter: Dilip Biswal
>            Priority: Major
>
> I encountered this while writing documentation for SQL reference. Here is the 
> small repro:
> UDF:
>  =====
> {code:java}
> import org.apache.hadoop.hive.ql.exec.UDF;
>   
> public class SimpleUdf extends UDF {
>   public int evaluate(int value) {
>     return value + 10;
>   }
> }
> {code}
> {code:java}
> spark.sql("CREATE FUNCTION simple_udf AS 'SimpleUdf' USING JAR 
> '/tmp/SimpleUdf.jar'").show
> spark.sql("SELECT simple_udf(c1) AS function_return_value FROM t1").show
> +---------------------+                                                       
>   
> |function_return_value|
> +---------------------+
> |                   11|
> |                   12|
> +---------------------+
> spark.sql("SELECT simple_udf(c1) AS function_return_value FROM t1").show
> scala> spark.sql("SELECT simple_udf(c1) AS function_return_value FROM 
> t1").show
> 19/09/23 00:43:18 WARN HiveConf: HiveConf of name 
> hive.internal.ss.authz.settings.applied.marker does not exist
> 19/09/23 00:43:18 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout 
> does not exist
> 19/09/23 00:43:18 WARN HiveConf: HiveConf of name hive.stats.retries.wait 
> does not exist
> org.apache.spark.sql.AnalysisException: No handler for UDF/UDAF/UDTF 
> 'SimpleUdf': java.lang.ClassNotFoundException: SimpleUdf; line 1 pos 7
>   at 
> scala.reflect.internal.util.AbstractFileClassLoader.findClass(AbstractFileClassLoader.scala:72)
>   at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>   at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>   at 
> org.apache.spark.sql.hive.HiveShim$HiveFunctionWrapper.createFunction(HiveShim.scala:245)
>   at 
> org.apache.spark.sql.hive.HiveSimpleUDF.function$lzycompute(hiveUDFs.scala:57)
>   at org.apache.spark.sql.hive.HiveSimpleUDF.function(hiveUDFs.scala:57)
>   at 
> org.apache.spark.sql.hive.HiveSimpleUDF.method$lzycompute(hiveUDFs.scala:61)
>   at org.apache.spark.sql.hive.HiveSimpleUDF.method(hiveUDFs.scala:60)
>   at 
> org.apache.spark.sql.hive.HiveSimpleUDF.dataType$lzycompute(hiveUDFs.scala:78)
>   at org.apache.spark.sql.hive.HiveSimpleUDF.dataType(hiveUDFs.scala:78)
>   at 
> org.apache.spark.sql.hive.HiveSessionCatalog.$anonfun$makeFunctionExpression$2(HiveSessionCatalog.scala:78)
>   at scala.util.Failure.getOrElse(Try.scala:222)
>   at 
> org.apache.spark.sql.hive.HiveSessionCatalog.makeFunctionExpression(HiveSessionCatalog.scala:70)
>   at 
> org.apache.spark.sql.catalyst.catalog.SessionCatalog.$anonfun$makeFunctionBuilder$1(SessionCatalog.scala:1176)
>   at 
> org.apache.spark.sql.catalyst.analysis.SimpleFunctionRegistry.lookupFunction(FunctionRegistry.scala:121)
>   at 
> org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction(SessionCatalog.scala:1344)
>   at 
> org.apache.spark.sql.hive.HiveSessionCatalog.super$lookupFunction(HiveSessionCatalog.scala:132)
>   at 
> org.apache.spark.sql.hive.HiveSessionCatalog.$anonfun$lookupFunction0$2(HiveSessionCatalog.scala:132)
> {code}
> Please note that the problem does not happen if we try it from a testsuite. 
> So far i have only seen it when i try it from shell. Also i tried it in 2.4.4 
> and observe the same behaviour.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to