Repository: spark Updated Branches: refs/heads/branch-2.0 c8628e877 -> 6e7310590
[SPARK-18368][SQL] Fix regexp replace when serialized ## What changes were proposed in this pull request? This makes the result value both transient and lazy, so that if the RegExpReplace object is initialized then serialized, `result: StringBuffer` will be correctly initialized. ## How was this patch tested? * Verified that this patch fixed the query that found the bug. * Added a test case that fails without the fix. Author: Ryan Blue <b...@apache.org> Closes #15834 from rdblue/SPARK-18368-fix-regexp-replace. (cherry picked from commit d4028de97687385fa1d1eb6301eb544c0ea4a135) Signed-off-by: Yin Huai <yh...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6e731059 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6e731059 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6e731059 Branch: refs/heads/branch-2.0 Commit: 6e73105904a7bae0f7c9b1bebcb83d5ba8265956 Parents: c8628e8 Author: Ryan Blue <b...@apache.org> Authored: Wed Nov 9 11:00:53 2016 -0800 Committer: Yin Huai <yh...@databricks.com> Committed: Wed Nov 9 11:01:45 2016 -0800 ---------------------------------------------------------------------- .../catalyst/expressions/regexpExpressions.scala | 2 +- .../expressions/RegexpExpressionsSuite.scala | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/6e731059/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index d25da3f..f6a55cf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -220,7 +220,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio @transient private var lastReplacement: String = _ @transient private var lastReplacementInUTF8: UTF8String = _ // result buffer write by Matcher - @transient private val result: StringBuffer = new StringBuffer + @transient private lazy val result: StringBuffer = new StringBuffer override def nullSafeEval(s: Any, p: Any, r: Any): Any = { if (!p.equals(lastRegex)) { http://git-wip-us.apache.org/repos/asf/spark/blob/6e731059/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index 5299549..d0d1aaa 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -17,7 +17,8 @@ package org.apache.spark.sql.catalyst.expressions -import org.apache.spark.SparkFunSuite +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.serializer.JavaSerializer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.types.StringType @@ -191,4 +192,17 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(StringSplit(s1, s2), null, row3) } + test("RegExpReplace serialization") { + val serializer = new JavaSerializer(new SparkConf()).newInstance + + val row = create_row("abc", "b", "") + + val s = 's.string.at(0) + val p = 'p.string.at(1) + val r = 'r.string.at(2) + + val expr: RegExpReplace = serializer.deserialize(serializer.serialize(RegExpReplace(s, p, r))) + checkEvaluation(expr, "ac", row) + } + } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org