LuciferYang commented on code in PR #55498:
URL: https://github.com/apache/spark/pull/55498#discussion_r3131123226
##########
sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MapLookupBenchmark.scala:
##########
@@ -165,11 +188,21 @@ object MapLookupBenchmark extends SqlBasedBenchmark {
}
override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
- val sizes = Seq(1000000, 100000, 10000, 1000, 100, 10, 1)
- for (size <- sizes) {
+ // Three sizes spanning three orders of magnitude: 10000 is well above the
default
+ // threshold (hash wins clearly), 1000 is at the threshold, and 10 is well
below it
+ // (hash overhead dominates -- justifies the threshold default). Upper
bound is capped
+ // because task serialization of a 1M-entry literal exceeds sbt's default
8g heap.
+ for (size <- Seq(10000, 1000, 10)) {
Review Comment:
fine for me
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]