This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push: new e98872fb5d07 [SPARK-46747][SQL] Avoid scan in getTableExistsQuery for JDBC Dialects e98872fb5d07 is described below commit e98872fb5d07d570e6d0516b49a5d2e58876d1a6 Author: Kent Yao <y...@apache.org> AuthorDate: Tue Jan 30 20:34:28 2024 +0800 [SPARK-46747][SQL] Avoid scan in getTableExistsQuery for JDBC Dialects ### What changes were proposed in this pull request? [SPARK-46747](https://issues.apache.org/jira/browse/SPARK-46747) reported an issue that Postgres instances suffered from too many shared locks, which was caused by Sparkās get table exist query. In this PR, we supplanted `"SELECT 1 FROM $table LIMIT 1"` with `"SELECT 1 FROM $table WHERE 1=0"` to prevent data from being scanned. ### Why are the changes needed? overhead reduction for JDBC datasources ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing JDBC v1/v2 datasouce tests. ### Was this patch authored or co-authored using generative AI tooling? no Closes #44948 from yaooqinn/SPARK-46747. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: Kent Yao <y...@apache.org> (cherry picked from commit 031df8fa62666f14f54cf0a792f7fa2acc43afee) Signed-off-by: Kent Yao <y...@apache.org> --- .../src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala | 2 +- .../src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala | 4 ---- .../src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala | 4 ---- sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala | 7 +++---- 4 files changed, 4 insertions(+), 13 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index 1e65542946af..0cd46dda62c3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -141,7 +141,7 @@ abstract class JdbcDialect extends Serializable with Logging{ * @return The SQL query to use for checking the table. */ def getTableExistsQuery(table: String): String = { - s"SELECT * FROM $table WHERE 1=0" + s"SELECT 1 FROM $table WHERE 1=0" } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala index 24f9bac74f86..73903d65b01a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala @@ -94,10 +94,6 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper { schemaBuilder.result } - override def getTableExistsQuery(table: String): String = { - s"SELECT 1 FROM $table LIMIT 1" - } - override def isCascadingTruncateTable(): Option[Boolean] = Some(false) // See https://dev.mysql.com/doc/refman/8.0/en/alter-table.html diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala index a668d66ee2f9..6de4d6ec71a8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala @@ -141,10 +141,6 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper { case _ => None } - override def getTableExistsQuery(table: String): String = { - s"SELECT 1 FROM $table LIMIT 1" - } - override def isCascadingTruncateTable(): Option[Boolean] = Some(false) /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index a222391c06fb..74b85f18ceef 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -989,10 +989,9 @@ class JDBCSuite extends QueryTest val h2 = JdbcDialects.get(url) val derby = JdbcDialects.get("jdbc:derby:db") val table = "weblogs" - val defaultQuery = s"SELECT * FROM $table WHERE 1=0" - val limitQuery = s"SELECT 1 FROM $table LIMIT 1" - assert(MySQL.getTableExistsQuery(table) == limitQuery) - assert(Postgres.getTableExistsQuery(table) == limitQuery) + val defaultQuery = s"SELECT 1 FROM $table WHERE 1=0" + assert(MySQL.getTableExistsQuery(table) == defaultQuery) + assert(Postgres.getTableExistsQuery(table) == defaultQuery) assert(db2.getTableExistsQuery(table) == defaultQuery) assert(h2.getTableExistsQuery(table) == defaultQuery) assert(derby.getTableExistsQuery(table) == defaultQuery) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org