[
https://issues.apache.org/jira/browse/SPARK-50793?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Xiaoguang Sun updated SPARK-50793:
----------------------------------
Description:
In MySQL, the type identifier used in cast function is different from their
type name. MySQL connector generates SQL which fails with cast.
These are the steps to reproduce.
# CREATE TABLE test_cast(string_col STRING, short_col SHORT, integer_col
INTEGER, long_col LONG, binary_col BINARY, double_col DOUBLE);
# INSERT INTO test_cast VALUES('0', 0, 0, 0, x'30', 0.0);
# SELECT * FROM test_cast WHERE CAST(string_col AS BINARY) = binary_col;
# SELECT * FROM test_cast WHERE CAST(string_col AS SHORT) = short_col;
# SELECT * FROM test_cast WHERE CAST(string_col AS INTEGER) = integer_col;
# SELECT * FROM test_cast WHERE CAST(string_col AS LONG) = long_col;
# SELECT * FROM test_cast WHERE CAST(short_col AS STRING) = '0';
# SELECT * FROM test_cast WHERE CAST(integer_col AS STRING) = '0';
# SELECT * FROM test_cast WHERE CAST(long_col AS STRING) = '0';
# SELECT * FROM test_cast WHERE CAST(binary_col AS STRING) = '0';
# SELECT * FROM test_cast WHERE CAST(double_col AS STRING) = '0';
{code:java}
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:227)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:96)
at
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1137)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1146)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.sql.SQLSyntaxErrorException: You have an error in your SQL
syntax; check the manual that corresponds to your MySQL server version for the
right syntax to use near "INTEGER) DAY))) "
at
com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:120)
at
com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:122)
at
com.mysql.cj.jdbc.ClientPreparedStatement.executeInternal(ClientPreparedStatement.java:953)
at
com.mysql.cj.jdbc.ClientPreparedStatement.executeQuery(ClientPreparedStatement.java:1003)
at
org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD.compute(JDBCRDD.scala:287)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
at org.apache.spark.scheduler.Task.run(Task.scala:146)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$6(Executor.scala:658)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:661)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
at java.base/java.lang.Thread.run(Thread.java:1583)
{code}
{code:java}
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:227)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:96)
at
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1137)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1146)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.sql.SQLSyntaxErrorException: You have an error in your SQL
syntax; check the manual that corresponds to your MySQL server version for the
right syntax to use near "LONGTEXT) AS DATE), INTERVAL 1 DAY))) "
at
com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:120)
at
com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:122)
at
com.mysql.cj.jdbc.ClientPreparedStatement.executeInternal(ClientPreparedStatement.java:953)
at
com.mysql.cj.jdbc.ClientPreparedStatement.executeQuery(ClientPreparedStatement.java:1003)
at
org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD.compute(JDBCRDD.scala:287)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
at org.apache.spark.scheduler.Task.run(Task.scala:146)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$6(Executor.scala:658)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:661)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
at java.base/java.lang.Thread.run(Thread.java:1583)
{code}
was:
In MySQL, the type identifier used in cast function is different from their
type name. MySQL connector generates SQL which fails with cast.
These are the steps to reproduce.
1. CREATE TABLE test_cast(date_col DATE, string_col STRING, short_col SHORT,
integer_col INTEGER, long_col LONG, binary_col BINARY);
2. INSERT INTO test_cast VALUES(CURRENT_DATE(), '0', 0, 0, 0, x'00');
3. SELECT * FROM test_cast WHERE date_col = DATE_ADD(CURRENT_DATE(),
string_col);
4. SELECT * FROM test_cast WHERE date_col = DATE_ADD(CURRENT_DATE(),
CAST(binary_col AS STRING));
5. SELECT * FROM test_cast WHERE date_col = DATE_ADD(CAST(long_col AS STRING),
1);
{code:java}
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:227)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:96)
at
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1137)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1146)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.sql.SQLSyntaxErrorException: You have an error in your SQL
syntax; check the manual that corresponds to your MySQL server version for the
right syntax to use near "INTEGER) DAY))) "
at
com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:120)
at
com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:122)
at
com.mysql.cj.jdbc.ClientPreparedStatement.executeInternal(ClientPreparedStatement.java:953)
at
com.mysql.cj.jdbc.ClientPreparedStatement.executeQuery(ClientPreparedStatement.java:1003)
at
org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD.compute(JDBCRDD.scala:287)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
at org.apache.spark.scheduler.Task.run(Task.scala:146)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$6(Executor.scala:658)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:661)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
at java.base/java.lang.Thread.run(Thread.java:1583)
{code}
{code:java}
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:227)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:96)
at
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1137)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1146)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.sql.SQLSyntaxErrorException: You have an error in your SQL
syntax; check the manual that corresponds to your MySQL server version for the
right syntax to use near "LONGTEXT) AS DATE), INTERVAL 1 DAY))) "
at
com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:120)
at
com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:122)
at
com.mysql.cj.jdbc.ClientPreparedStatement.executeInternal(ClientPreparedStatement.java:953)
at
com.mysql.cj.jdbc.ClientPreparedStatement.executeQuery(ClientPreparedStatement.java:1003)
at
org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD.compute(JDBCRDD.scala:287)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
at org.apache.spark.scheduler.Task.run(Task.scala:146)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$6(Executor.scala:658)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:661)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
at java.base/java.lang.Thread.run(Thread.java:1583)
{code}
> MySQL JDBC Connector failed to cast LONGTEXT, SMALLINT, INTEGER, BIGINT and
> BLOB types
> --------------------------------------------------------------------------------------
>
> Key: SPARK-50793
> URL: https://issues.apache.org/jira/browse/SPARK-50793
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 4.0.0, 3.4.4, 3.5.4
> Reporter: Xiaoguang Sun
> Priority: Major
> Labels: pull-request-available
>
> In MySQL, the type identifier used in cast function is different from their
> type name. MySQL connector generates SQL which fails with cast.
> These are the steps to reproduce.
> # CREATE TABLE test_cast(string_col STRING, short_col SHORT, integer_col
> INTEGER, long_col LONG, binary_col BINARY, double_col DOUBLE);
> # INSERT INTO test_cast VALUES('0', 0, 0, 0, x'30', 0.0);
> # SELECT * FROM test_cast WHERE CAST(string_col AS BINARY) = binary_col;
> # SELECT * FROM test_cast WHERE CAST(string_col AS SHORT) = short_col;
> # SELECT * FROM test_cast WHERE CAST(string_col AS INTEGER) = integer_col;
> # SELECT * FROM test_cast WHERE CAST(string_col AS LONG) = long_col;
> # SELECT * FROM test_cast WHERE CAST(short_col AS STRING) = '0';
> # SELECT * FROM test_cast WHERE CAST(integer_col AS STRING) = '0';
> # SELECT * FROM test_cast WHERE CAST(long_col AS STRING) = '0';
> # SELECT * FROM test_cast WHERE CAST(binary_col AS STRING) = '0';
> # SELECT * FROM test_cast WHERE CAST(double_col AS STRING) = '0';
> {code:java}
> at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:227)
> at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:96)
> at
> org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1137)
> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1146)
> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: java.sql.SQLSyntaxErrorException: You have an error in your SQL
> syntax; check the manual that corresponds to your MySQL server version for
> the right syntax to use near "INTEGER) DAY))) "
> at
> com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:120)
> at
> com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:122)
> at
> com.mysql.cj.jdbc.ClientPreparedStatement.executeInternal(ClientPreparedStatement.java:953)
> at
> com.mysql.cj.jdbc.ClientPreparedStatement.executeQuery(ClientPreparedStatement.java:1003)
> at
> org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD.compute(JDBCRDD.scala:287)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
> at
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
> at org.apache.spark.scheduler.Task.run(Task.scala:146)
> at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$6(Executor.scala:658)
> at
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
> at
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:661)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
> at java.base/java.lang.Thread.run(Thread.java:1583)
> {code}
> {code:java}
> at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:227)
> at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:96)
> at
> org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1137)
> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1146)
> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: java.sql.SQLSyntaxErrorException: You have an error in your SQL
> syntax; check the manual that corresponds to your MySQL server version for
> the right syntax to use near "LONGTEXT) AS DATE), INTERVAL 1 DAY))) "
> at
> com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:120)
> at
> com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:122)
> at
> com.mysql.cj.jdbc.ClientPreparedStatement.executeInternal(ClientPreparedStatement.java:953)
> at
> com.mysql.cj.jdbc.ClientPreparedStatement.executeQuery(ClientPreparedStatement.java:1003)
> at
> org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD.compute(JDBCRDD.scala:287)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
> at
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
> at org.apache.spark.scheduler.Task.run(Task.scala:146)
> at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$6(Executor.scala:658)
> at
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
> at
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:661)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
> at java.base/java.lang.Thread.run(Thread.java:1583)
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]