This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new 64115d9829ec [SPARK-46747][SQL] Avoid scan in getTableExistsQuery for JDBC Dialects 64115d9829ec is described below commit 64115d9829ec881b69ffe44f844845104484a025 Author: Kent Yao <y...@apache.org> AuthorDate: Tue Jan 30 20:34:28 2024 +0800 [SPARK-46747][SQL] Avoid scan in getTableExistsQuery for JDBC Dialects ### What changes were proposed in this pull request? [SPARK-46747](https://issues.apache.org/jira/browse/SPARK-46747) reported an issue that Postgres instances suffered from too many shared locks, which was caused by Sparkās get table exist query. In this PR, we supplanted `"SELECT 1 FROM $table LIMIT 1"` with `"SELECT 1 FROM $table WHERE 1=0"` to prevent data from being scanned. ### Why are the changes needed? overhead reduction for JDBC datasources ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing JDBC v1/v2 datasouce tests. ### Was this patch authored or co-authored using generative AI tooling? no Closes #44948 from yaooqinn/SPARK-46747. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: Kent Yao <y...@apache.org> (cherry picked from commit 031df8fa62666f14f54cf0a792f7fa2acc43afee) Signed-off-by: Kent Yao <y...@apache.org> --- .../src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala | 2 +- .../src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala | 4 ---- .../src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala | 4 ---- sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala | 7 +++---- 4 files changed, 4 insertions(+), 13 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index c58d8368150b..ab5bfdb7189a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -142,7 +142,7 @@ abstract class JdbcDialect extends Serializable with Logging { * @return The SQL query to use for checking the table. */ def getTableExistsQuery(table: String): String = { - s"SELECT * FROM $table WHERE 1=0" + s"SELECT 1 FROM $table WHERE 1=0" } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala index 12882dc8e676..29eb8916bb79 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala @@ -107,10 +107,6 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper { schemaBuilder.result } - override def getTableExistsQuery(table: String): String = { - s"SELECT 1 FROM $table LIMIT 1" - } - override def isCascadingTruncateTable(): Option[Boolean] = Some(false) // See https://dev.mysql.com/doc/refman/8.0/en/alter-table.html diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala index c2ca45d9143a..7f6f8dc00886 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala @@ -113,10 +113,6 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper { case _ => None } - override def getTableExistsQuery(table: String): String = { - s"SELECT 1 FROM $table LIMIT 1" - } - override def isCascadingTruncateTable(): Option[Boolean] = Some(false) /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index aa66fcd53041..7c9306b65f1e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -1009,10 +1009,9 @@ class JDBCSuite extends QueryTest with SharedSparkSession { val h2 = JdbcDialects.get(url) val derby = JdbcDialects.get("jdbc:derby:db") val table = "weblogs" - val defaultQuery = s"SELECT * FROM $table WHERE 1=0" - val limitQuery = s"SELECT 1 FROM $table LIMIT 1" - assert(MySQL.getTableExistsQuery(table) == limitQuery) - assert(Postgres.getTableExistsQuery(table) == limitQuery) + val defaultQuery = s"SELECT 1 FROM $table WHERE 1=0" + assert(MySQL.getTableExistsQuery(table) == defaultQuery) + assert(Postgres.getTableExistsQuery(table) == defaultQuery) assert(db2.getTableExistsQuery(table) == defaultQuery) assert(h2.getTableExistsQuery(table) == defaultQuery) assert(derby.getTableExistsQuery(table) == defaultQuery) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org