[ 
https://issues.apache.org/jira/browse/SPARK-18436?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Herman van Hovell updated SPARK-18436:
--------------------------------------
    Description: 
When using a JDBC data source, the "isin" function generates invalid SQL syntax 
when called with an empty array, which causes the JDBC driver to throw an 
exception.
If the array is not empty, it works fine.

In the below example you can assume that SOURCE_CONNECTION, SQL_DRIVER and 
TABLE are all correctly defined.
{noformat}
scala> val filter = Array[String]()
filter: Array[String] = Array()

scala> val sortDF = spark.read.format("jdbc").options(Map("url" -> 
SOURCE_CONNECTION, "driver" -> SQL_DRIVER, "dbtable" -> 
TABLE)).load().filter($"cl_ult".isin(filter:_*))
sortDF: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [ibi_bulk_id: 
bigint, ibi_row_id: int ... 174 more fields]

scala> sortDF.show()
16/11/14 15:35:46 ERROR Executor: Exception in task 0.0 in stage 6.0 (TID 205)
com.microsoft.sqlserver.jdbc.SQLServerException: Incorrect syntax near ')'.
        at 
com.microsoft.sqlserver.jdbc.SQLServerException.makeFromDatabaseError(SQLServerException.java:216)
        at 
com.microsoft.sqlserver.jdbc.SQLServerStatement.getNextResult(SQLServerStatement.java:1515)
        at 
com.microsoft.sqlserver.jdbc.SQLServerPreparedStatement.doExecutePreparedStatement(SQLServerPreparedStatement.java:404)
        at 
com.microsoft.sqlserver.jdbc.SQLServerPreparedStatement$PrepStmtExecCmd.doExecute(SQLServerPreparedStatement.java:350)
        at com.microsoft.sqlserver.jdbc.TDSCommand.execute(IOBuffer.java:5696)
        at 
com.microsoft.sqlserver.jdbc.SQLServerConnection.executeCommand(SQLServerConnection.java:1715)
        at 
com.microsoft.sqlserver.jdbc.SQLServerStatement.executeCommand(SQLServerStatement.java:180)
        at 
com.microsoft.sqlserver.jdbc.SQLServerStatement.executeStatement(SQLServerStatement.java:155)
        at 
com.microsoft.sqlserver.jdbc.SQLServerPreparedStatement.executeQuery(SQLServerPreparedStatement.java:285)
        at 
org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$$anon$1.<init>(JDBCRDD.scala:408)
        at 
org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD.compute(JDBCRDD.scala:379)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
        at org.apache.spark.scheduler.Task.run(Task.scala:86)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)
{noformat}

  was:
When using a JDBC data source, the "isin" function generates invalid SQL syntax 
when called with an empty array, which causes the JDBC driver to throw an 
exception.
If the array is not empty, it works fine.

In the below example you can assume that SOURCE_CONNECTION, SQL_DRIVER and 
TABLE are all correctly defined.

scala> val filter = Array[String]()
filter: Array[String] = Array()

scala> val sortDF = spark.read.format("jdbc").options(Map("url" -> 
SOURCE_CONNECTION, "driver" -> SQL_DRIVER, "dbtable" -> 
TABLE)).load().filter($"cl_ult".isin(filter:_*))
sortDF: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [ibi_bulk_id: 
bigint, ibi_row_id: int ... 174 more fields]

scala> sortDF.show()
16/11/14 15:35:46 ERROR Executor: Exception in task 0.0 in stage 6.0 (TID 205)
com.microsoft.sqlserver.jdbc.SQLServerException: Incorrect syntax near ')'.
        at 
com.microsoft.sqlserver.jdbc.SQLServerException.makeFromDatabaseError(SQLServerException.java:216)
        at 
com.microsoft.sqlserver.jdbc.SQLServerStatement.getNextResult(SQLServerStatement.java:1515)
        at 
com.microsoft.sqlserver.jdbc.SQLServerPreparedStatement.doExecutePreparedStatement(SQLServerPreparedStatement.java:404)
        at 
com.microsoft.sqlserver.jdbc.SQLServerPreparedStatement$PrepStmtExecCmd.doExecute(SQLServerPreparedStatement.java:350)
        at com.microsoft.sqlserver.jdbc.TDSCommand.execute(IOBuffer.java:5696)
        at 
com.microsoft.sqlserver.jdbc.SQLServerConnection.executeCommand(SQLServerConnection.java:1715)
        at 
com.microsoft.sqlserver.jdbc.SQLServerStatement.executeCommand(SQLServerStatement.java:180)
        at 
com.microsoft.sqlserver.jdbc.SQLServerStatement.executeStatement(SQLServerStatement.java:155)
        at 
com.microsoft.sqlserver.jdbc.SQLServerPreparedStatement.executeQuery(SQLServerPreparedStatement.java:285)
        at 
org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$$anon$1.<init>(JDBCRDD.scala:408)
        at 
org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD.compute(JDBCRDD.scala:379)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
        at org.apache.spark.scheduler.Task.run(Task.scala:86)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)



> isin causing SQL syntax error with JDBC
> ---------------------------------------
>
>                 Key: SPARK-18436
>                 URL: https://issues.apache.org/jira/browse/SPARK-18436
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.0.1
>         Environment: Linux, SQL Server 2012
>            Reporter: Dan
>              Labels: jdbc, sql
>
> When using a JDBC data source, the "isin" function generates invalid SQL 
> syntax when called with an empty array, which causes the JDBC driver to throw 
> an exception.
> If the array is not empty, it works fine.
> In the below example you can assume that SOURCE_CONNECTION, SQL_DRIVER and 
> TABLE are all correctly defined.
> {noformat}
> scala> val filter = Array[String]()
> filter: Array[String] = Array()
> scala> val sortDF = spark.read.format("jdbc").options(Map("url" -> 
> SOURCE_CONNECTION, "driver" -> SQL_DRIVER, "dbtable" -> 
> TABLE)).load().filter($"cl_ult".isin(filter:_*))
> sortDF: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = 
> [ibi_bulk_id: bigint, ibi_row_id: int ... 174 more fields]
> scala> sortDF.show()
> 16/11/14 15:35:46 ERROR Executor: Exception in task 0.0 in stage 6.0 (TID 205)
> com.microsoft.sqlserver.jdbc.SQLServerException: Incorrect syntax near ')'.
>         at 
> com.microsoft.sqlserver.jdbc.SQLServerException.makeFromDatabaseError(SQLServerException.java:216)
>         at 
> com.microsoft.sqlserver.jdbc.SQLServerStatement.getNextResult(SQLServerStatement.java:1515)
>         at 
> com.microsoft.sqlserver.jdbc.SQLServerPreparedStatement.doExecutePreparedStatement(SQLServerPreparedStatement.java:404)
>         at 
> com.microsoft.sqlserver.jdbc.SQLServerPreparedStatement$PrepStmtExecCmd.doExecute(SQLServerPreparedStatement.java:350)
>         at com.microsoft.sqlserver.jdbc.TDSCommand.execute(IOBuffer.java:5696)
>         at 
> com.microsoft.sqlserver.jdbc.SQLServerConnection.executeCommand(SQLServerConnection.java:1715)
>         at 
> com.microsoft.sqlserver.jdbc.SQLServerStatement.executeCommand(SQLServerStatement.java:180)
>         at 
> com.microsoft.sqlserver.jdbc.SQLServerStatement.executeStatement(SQLServerStatement.java:155)
>         at 
> com.microsoft.sqlserver.jdbc.SQLServerPreparedStatement.executeQuery(SQLServerPreparedStatement.java:285)
>         at 
> org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$$anon$1.<init>(JDBCRDD.scala:408)
>         at 
> org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD.compute(JDBCRDD.scala:379)
>         at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
>         at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
>         at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>         at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
>         at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
>         at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>         at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
>         at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
>         at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>         at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
>         at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
>         at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
>         at org.apache.spark.scheduler.Task.run(Task.scala:86)
>         at 
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>         at java.lang.Thread.run(Thread.java:745)
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to