This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new f61b31a [SPARK-31916][SQL] StringConcat can lead to StringIndexOutOfBoundsException f61b31a is described below commit f61b31a5a484c7e90920ec36c456594ce92cdf73 Author: Dilip Biswal <dkbis...@gmail.com> AuthorDate: Fri Jun 12 09:19:29 2020 +0900 [SPARK-31916][SQL] StringConcat can lead to StringIndexOutOfBoundsException ### What changes were proposed in this pull request? A minor fix to fix the append method of StringConcat to cap the length at MAX_ROUNDED_ARRAY_LENGTH to make sure it does not overflow and cause StringIndexOutOfBoundsException Thanks to **Jeffrey Stokes** for reporting the issue and explaining the underlying problem in detail in the JIRA. ### Why are the changes needed? This fixes StringIndexOutOfBoundsException on an overflow. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Added a test in StringsUtilSuite. Closes #28750 from dilipbiswal/SPARK-31916. Authored-by: Dilip Biswal <dkbis...@gmail.com> Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> (cherry picked from commit b87a342c7dd51046fcbe323db640c825646fb8d4) Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> --- .../spark/sql/catalyst/util/StringUtils.scala | 6 +++- .../spark/sql/catalyst/util/StringUtilsSuite.scala | 32 +++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala index b42ae4e..2a416d6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala @@ -123,7 +123,11 @@ object StringUtils extends Logging { val stringToAppend = if (available >= sLen) s else s.substring(0, available) strings.append(stringToAppend) } - length += sLen + + // Keeps the total length of appended strings. Note that we need to cap the length at + // `ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH`; otherwise, we will overflow + // length causing StringIndexOutOfBoundsException in the substring call above. + length = Math.min(length.toLong + sLen, ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH).toInt } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala index 67bc4bc..c68e89fc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala @@ -18,9 +18,11 @@ package org.apache.spark.sql.catalyst.util import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.util.StringUtils._ +import org.apache.spark.sql.internal.SQLConf -class StringUtilsSuite extends SparkFunSuite { +class StringUtilsSuite extends SparkFunSuite with SQLHelper { test("escapeLikeRegex") { val expectedEscapedStrOne = "(?s)\\Qa\\E\\Qb\\E\\Qd\\E\\Qe\\E\\Qf\\E" @@ -98,4 +100,32 @@ class StringUtilsSuite extends SparkFunSuite { assert(checkLimit("1234567")) assert(checkLimit("1234567890")) } + + test("SPARK-31916: StringConcat doesn't overflow on many inputs") { + val concat = new StringConcat(maxLength = 100) + val stringToAppend = "Test internal index of StringConcat does not overflow with many " + + "append calls" + 0.to((Integer.MAX_VALUE / stringToAppend.length) + 1).foreach { _ => + concat.append(stringToAppend) + } + assert(concat.toString.length === 100) + } + + test("SPARK-31916: verify that PlanStringConcat's output shows the actual length of the plan") { + withSQLConf(SQLConf.MAX_PLAN_STRING_LENGTH.key -> "0") { + val concat = new PlanStringConcat() + 0.to(3).foreach { i => + concat.append(s"plan fragment $i") + } + assert(concat.toString === "Truncated plan of 60 characters") + } + + withSQLConf(SQLConf.MAX_PLAN_STRING_LENGTH.key -> "60") { + val concat = new PlanStringConcat() + 0.to(2).foreach { i => + concat.append(s"plan fragment $i") + } + assert(concat.toString === "plan fragment 0plan fragment 1... 15 more characters") + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org