(spark) branch master updated: [SPARK-47440][SQL] Fix pushing unsupported syntax to MsSqlServer

yao Fri, 22 Mar 2024 03:45:02 -0700

This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 227a50a1766a [SPARK-47440][SQL] Fix pushing unsupported syntax to 
MsSqlServer
227a50a1766a is described below

commit 227a50a1766ac1476b0031e1c60d2604eccdb9a7
Author: Stefan Bukorovic <stefan.bukoro...@databricks.com>
AuthorDate: Fri Mar 22 18:44:30 2024 +0800

    [SPARK-47440][SQL] Fix pushing unsupported syntax to MsSqlServer
    
    ### What changes were proposed in this pull request?
    In this PR, I propose a change in SQLQuery builder of MsSqlServer dialect. 
I override build method to check for boolean operator in binary comparisons and 
throw exception if encountered.
    
    ### Why are the changes needed?
    MsSqlServer syntax prevents boolean operators in any binary comparison. 
Reasoning is lack of boolean data type in MsSqlServer.
    It was possible to construct Spark query that would generate this situation 
in MsSqlServer and engine would throw syntax exception on the MsSqlServer side. 
This PR solves this bug.
    For example, in table `people` there is a `name` column. In MsSqlServer if 
we try to execute:
    `SELECT * FROM people WHERE (name LIKE 'a%') = (name LIKE '%b')`
    we would get a syntax error. However this query is fine in other major 
engines.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, user will not encounter syntax exception in MsSqlServer when writing 
these queries.
    
    ### How was this patch tested?
    By running a unit test in MsSqlServerIntegrationSuite
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #45564 from stefanbuk-db/SQLServer_like_operator_bugfix.
    
    Authored-by: Stefan Bukorovic <stefan.bukoro...@databricks.com>
    Signed-off-by: Kent Yao <y...@apache.org>
---
 .../spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala  | 16 ++++++++++++++++
 .../org/apache/spark/sql/jdbc/MsSqlServerDialect.scala   | 15 +++++++++++++++
 2 files changed, 31 insertions(+)

diff --git 
a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
 
b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
index e451cc2b8c52..0dc3a39f4db5 100644
--- 
a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
+++ 
b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -127,4 +127,20 @@ class MsSqlServerIntegrationSuite extends 
DockerJDBCIntegrationV2Suite with V2JD
       },
       errorClass = "_LEGACY_ERROR_TEMP_2271")
   }
+
+  test("SPARK-47440: SQLServer does not support boolean expression in binary 
comparison") {
+    val df1 = sql("SELECT name FROM " +
+      s"$catalogName.employee WHERE ((name LIKE 'am%') = (name LIKE '%y'))")
+    assert(df1.collect().length == 4)
+
+    val df2 = sql("SELECT name FROM " +
+      s"$catalogName.employee " +
+      "WHERE ((name NOT LIKE 'am%') = (name NOT LIKE '%y'))")
+    assert(df2.collect().length == 4)
+
+    val df3 = sql("SELECT name FROM " +
+      s"$catalogName.employee " +
+      "WHERE (dept > 1 AND ((name LIKE 'am%') = (name LIKE '%y')))")
+    assert(df3.collect().length == 3)
+  }
 }
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index 1b6dc1af9ec0..862e99adc3b0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException
 import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.connector.expressions.{Expression, NullOrdering, 
SortDirection}
+import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 import org.apache.spark.sql.internal.SQLConf
@@ -79,6 +80,20 @@ private case class MsSqlServerDialect() extends JdbcDialect {
       case "STDDEV_SAMP" => "STDEV"
       case _ => super.dialectFunctionName(funcName)
     }
+
+    override def build(expr: Expression): String = {
+      // MsSqlServer does not support boolean comparison using standard 
comparison operators
+      // We shouldn't propagate these queries to MsSqlServer
+      expr match {
+        case e: Predicate => e.name() match {
+          case "=" | "<>" | "<=>" | "<" | "<=" | ">" | ">="
+              if e.children().exists(_.isInstanceOf[Predicate]) =>
+            super.visitUnexpectedExpr(expr)
+          case _ => super.build(expr)
+        }
+        case _ => super.build(expr)
+      }
+    }
   }
 
   override def compileExpression(expr: Expression): Option[String] = {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-47440][SQL] Fix pushing unsupported syntax to MsSqlServer

Reply via email to