This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push: new 91d11568c2eb [SPARK-47440][SQL] Fix pushing unsupported syntax to MsSqlServer 91d11568c2eb is described below commit 91d11568c2ebd937e0ea6de892649d9115035410 Author: Stefan Bukorovic <stefan.bukoro...@databricks.com> AuthorDate: Fri Mar 22 18:44:30 2024 +0800 [SPARK-47440][SQL] Fix pushing unsupported syntax to MsSqlServer ### What changes were proposed in this pull request? In this PR, I propose a change in SQLQuery builder of MsSqlServer dialect. I override build method to check for boolean operator in binary comparisons and throw exception if encountered. ### Why are the changes needed? MsSqlServer syntax prevents boolean operators in any binary comparison. Reasoning is lack of boolean data type in MsSqlServer. It was possible to construct Spark query that would generate this situation in MsSqlServer and engine would throw syntax exception on the MsSqlServer side. This PR solves this bug. For example, in table `people` there is a `name` column. In MsSqlServer if we try to execute: `SELECT * FROM people WHERE (name LIKE 'a%') = (name LIKE '%b')` we would get a syntax error. However this query is fine in other major engines. ### Does this PR introduce _any_ user-facing change? Yes, user will not encounter syntax exception in MsSqlServer when writing these queries. ### How was this patch tested? By running a unit test in MsSqlServerIntegrationSuite ### Was this patch authored or co-authored using generative AI tooling? No Closes #45564 from stefanbuk-db/SQLServer_like_operator_bugfix. Authored-by: Stefan Bukorovic <stefan.bukoro...@databricks.com> Signed-off-by: Kent Yao <y...@apache.org> (cherry picked from commit 227a50a1766ac1476b0031e1c60d2604eccdb9a7) Signed-off-by: Kent Yao <y...@apache.org> --- .../spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala | 16 ++++++++++++++++ .../org/apache/spark/sql/jdbc/MsSqlServerDialect.scala | 15 +++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala index e451cc2b8c52..0dc3a39f4db5 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala @@ -127,4 +127,20 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD }, errorClass = "_LEGACY_ERROR_TEMP_2271") } + + test("SPARK-47440: SQLServer does not support boolean expression in binary comparison") { + val df1 = sql("SELECT name FROM " + + s"$catalogName.employee WHERE ((name LIKE 'am%') = (name LIKE '%y'))") + assert(df1.collect().length == 4) + + val df2 = sql("SELECT name FROM " + + s"$catalogName.employee " + + "WHERE ((name NOT LIKE 'am%') = (name NOT LIKE '%y'))") + assert(df2.collect().length == 4) + + val df3 = sql("SELECT name FROM " + + s"$catalogName.employee " + + "WHERE (dept > 1 AND ((name LIKE 'am%') = (name LIKE '%y')))") + assert(df3.collect().length == 3) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala index 78ec3ac42d79..3022bca87a9f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException import org.apache.spark.sql.connector.catalog.Identifier import org.apache.spark.sql.connector.expressions.{Expression, NullOrdering, SortDirection} +import org.apache.spark.sql.connector.expressions.filter.Predicate import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions import org.apache.spark.sql.internal.SQLConf @@ -86,6 +87,20 @@ private object MsSqlServerDialect extends JdbcDialect { case "STDDEV_SAMP" => "STDEV" case _ => super.dialectFunctionName(funcName) } + + override def build(expr: Expression): String = { + // MsSqlServer does not support boolean comparison using standard comparison operators + // We shouldn't propagate these queries to MsSqlServer + expr match { + case e: Predicate => e.name() match { + case "=" | "<>" | "<=>" | "<" | "<=" | ">" | ">=" + if e.children().exists(_.isInstanceOf[Predicate]) => + super.visitUnexpectedExpr(expr) + case _ => super.build(expr) + } + case _ => super.build(expr) + } + } } override def compileExpression(expr: Expression): Option[String] = { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org