This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push: new 60bd91f257f [SPARK-40247][SQL] Fix BitSet equality check 60bd91f257f is described below commit 60bd91f257f601985de144fde84a019327cf23f2 Author: Peter Toth <pt...@cloudera.com> AuthorDate: Mon Aug 29 15:25:39 2022 +0800 [SPARK-40247][SQL] Fix BitSet equality check ### What changes were proposed in this pull request? Spark's `BitSet` doesn't implement `equals()` and `hashCode()` but it is used in `FileSourceScanExec` for bucket pruning. ### Why are the changes needed? Without proper equality check reuse issues can occur. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added new UT. Closes #37696 from peter-toth/SPARK-40247-fix-bitset-equals. Authored-by: Peter Toth <pt...@cloudera.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 527ddece8fdbe703dcd239401c97ddb2c6122182) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../org/apache/spark/util/collection/BitSet.scala | 9 ++++++++ .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 25 ++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala index 61386114997..6bb5058f5ed 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala @@ -250,4 +250,13 @@ class BitSet(numBits: Int) extends Serializable { /** Return the number of longs it would take to hold numBits. */ private def bit2words(numBits: Int) = ((numBits - 1) >> 6) + 1 + + override def equals(other: Any): Boolean = other match { + case otherSet: BitSet => Arrays.equals(words, otherSet.words) + case _ => false + } + + override def hashCode(): Int = { + Arrays.hashCode(words) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index b0f2421d897..6ed6a85b8d1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4085,6 +4085,31 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } + test("SPARK-40247: Fix BitSet equals") { + withTable("td") { + testData + .withColumn("bucket", $"key" % 3) + .write + .mode(SaveMode.Overwrite) + .bucketBy(2, "bucket") + .format("parquet") + .saveAsTable("td") + val df = sql( + """ + |SELECT t1.key, t2.key, t3.key + |FROM td AS t1 + |JOIN td AS t2 ON t2.key = t1.key + |JOIN td AS t3 ON t3.key = t2.key + |WHERE t1.bucket = 1 AND t2.bucket = 1 AND t3.bucket = 1 + |""".stripMargin) + df.collect() + val reusedExchanges = collect(df.queryExecution.executedPlan) { + case r: ReusedExchangeExec => r + } + assert(reusedExchanges.size == 1) + } + } + test("SPARK-35331: Fix resolving original expression in RepartitionByExpression after aliased") { Seq("CLUSTER", "DISTRIBUTE").foreach { keyword => Seq("a", "substr(a, 0, 3)").foreach { expr => --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org