This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new 30c6802574e [SPARK-38889][SQL] Compile boolean column filters to use 
the bit type for MSSQL data source
30c6802574e is described below

commit 30c6802574e5993e6f0f10d4c189c6e8325bcc5c
Author: allisonwang-db <allison.w...@databricks.com>
AuthorDate: Thu Apr 14 13:11:00 2022 +0900

    [SPARK-38889][SQL] Compile boolean column filters to use the bit type for 
MSSQL data source
    
    ### What changes were proposed in this pull request?
    This PR compiles the boolean data type to the bit data type for pushed 
column filters while querying the MSSQL data soruce. Microsoft SQL Server does 
not support the boolean type, so the JDBC dialect should use the bit data type 
instead.
    
    ### Why are the changes needed?
    
    To fix a bug that was exposed by the boolean column filter pushdown to SQL 
server data source.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Added a new integration test.
    
    Closes #36182 from allisonwang-db/spark-38889-mssql-predicate-pushdown.
    
    Authored-by: allisonwang-db <allison.w...@databricks.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
    (cherry picked from commit 320f88d54440e05228a90ef5663991e28ae07c95)
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .../spark/sql/jdbc/MsSqlServerIntegrationSuite.scala    | 17 +++++++++++++++++
 .../org/apache/spark/sql/jdbc/MsSqlServerDialect.scala  | 10 ++++++++++
 2 files changed, 27 insertions(+)

diff --git 
a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
 
b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
index 5992253a958..e293f9a8f7b 100644
--- 
a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
+++ 
b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
@@ -22,6 +22,7 @@ import java.sql.{Connection, Date, Timestamp}
 import java.util.Properties
 
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
+import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.tags.DockerTest
 
@@ -140,6 +141,14 @@ class MsSqlServerIntegrationSuite extends 
DockerJDBCIntegrationSuite {
         |'MULTIPOLYGON(((2 2, 2 -2, -2 -2, -2 2, 2 2)),((1 1, 3 1, 3 3, 1 3, 1 
1)))',
         |'GEOMETRYCOLLECTION(LINESTRING(1 1, 3 5),POLYGON((-1 -1, -1 -5, -5 
-5, -5 -1, -1 -1)))')
       """.stripMargin).executeUpdate()
+    conn.prepareStatement(
+      """
+        |CREATE TABLE bits(a INT, b INT, c BIT)
+        |""".stripMargin).executeUpdate()
+    conn.prepareStatement(
+      """
+        |INSERT INTO bits VALUES (1, 2, 1)
+      """.stripMargin).executeUpdate()
   }
 
   test("Basic test") {
@@ -357,4 +366,12 @@ class MsSqlServerIntegrationSuite extends 
DockerJDBCIntegrationSuite {
         0, 3, 0, 0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 
2, 0,
         0, 0, 0, 1, 0, 0, 0, 3))
   }
+
+  test("SPARK-38889: MsSqlServerDialect should handle boolean filter push 
down") {
+    val df = spark.read.jdbc(jdbcUrl, "bits", new Properties)
+    val rows = df.collect()
+    assert(rows.length == 1)
+    val filtered = df.where(col("c") === 0).collect()
+    assert(filtered.length == 0)
+  }
 }
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index 8d2fbec55f9..a42129dbe8d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -40,6 +40,16 @@ private object MsSqlServerDialect extends JdbcDialect {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:sqlserver")
 
+  // Microsoft SQL Server does not have the boolean type.
+  // Compile the boolean value to the bit data type instead.
+  // scalastyle:off line.size.limit
+  // See 
https://docs.microsoft.com/en-us/sql/t-sql/data-types/data-types-transact-sql?view=sql-server-ver15
+  // scalastyle:on line.size.limit
+  override def compileValue(value: Any): Any = value match {
+    case booleanValue: Boolean => if (booleanValue) 1 else 0
+    case other => super.compileValue(other)
+  }
+
   // scalastyle:off line.size.limit
   // See 
https://docs.microsoft.com/en-us/sql/t-sql/functions/aggregate-functions-transact-sql?view=sql-server-ver15
   // scalastyle:on line.size.limit


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to