spark git commit: [SPARK-16514][SQL] Fix various regex codegen bugs
Repository: spark Updated Branches: refs/heads/branch-1.6 702178d1f -> 9808735e0 [SPARK-16514][SQL] Fix various regex codegen bugs ## What changes were proposed in this pull request? RegexExtract and RegexReplace currently crash on non-nullable input due use of a hard-coded local variable name (e.g. compiles fail with `java.lang.Exception: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 85, Column 26: Redefinition of local variable "m" `). This changes those variables to use fresh names, and also in a few other places. ## How was this patch tested? Unit tests. rxin Author: Eric LiangCloses #14168 from ericl/sc-3906. (cherry picked from commit 1c58fa905b6543d366d00b2e5394dfd633987f6d) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9808735e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9808735e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9808735e Branch: refs/heads/branch-1.6 Commit: 9808735e0ce91c68df4c1ce82c44543995d44aed Parents: 702178d Author: Eric Liang Authored: Tue Jul 12 23:09:02 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 12 23:09:31 2016 -0700 -- .../expressions/regexpExpressions.scala | 48 ++-- .../expressions/StringExpressionsSuite.scala| 6 +++ 2 files changed, 39 insertions(+), 15 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9808735e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 9e484c5..154c7a0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -104,10 +104,11 @@ case class Like(left: Expression, right: Expression) """ } } else { + val rightStr = ctx.freshName("rightStr") nullSafeCodeGen(ctx, ev, (eval1, eval2) => { s""" - String rightStr = ${eval2}.toString(); - ${patternClass} $pattern = ${patternClass}.compile($escapeFunc(rightStr)); + String $rightStr = ${eval2}.toString(); + ${patternClass} $pattern = ${patternClass}.compile($escapeFunc($rightStr)); ${ev.value} = $pattern.matcher(${eval1}.toString()).matches(); """ }) @@ -152,10 +153,11 @@ case class RLike(left: Expression, right: Expression) """ } } else { + val rightStr = ctx.freshName("rightStr") nullSafeCodeGen(ctx, ev, (eval1, eval2) => { s""" - String rightStr = ${eval2}.toString(); - ${patternClass} $pattern = ${patternClass}.compile(rightStr); + String $rightStr = ${eval2}.toString(); + ${patternClass} $pattern = ${patternClass}.compile($rightStr); ${ev.value} = $pattern.matcher(${eval1}.toString()).find(0); """ }) @@ -248,6 +250,8 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio val classNamePattern = classOf[Pattern].getCanonicalName val classNameStringBuffer = classOf[java.lang.StringBuffer].getCanonicalName +val matcher = ctx.freshName("matcher") + ctx.addMutableState("UTF8String", termLastRegex, s"${termLastRegex} = null;") ctx.addMutableState(classNamePattern, termPattern, s"${termPattern} = null;") ctx.addMutableState("String", termLastReplacement, s"${termLastReplacement} = null;") @@ -256,6 +260,12 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio ctx.addMutableState(classNameStringBuffer, termResult, s"${termResult} = new $classNameStringBuffer();") +val setEvNotNull = if (nullable) { + s"${ev.isNull} = false;" +} else { + "" +} + nullSafeCodeGen(ctx, ev, (subject, regexp, rep) => { s""" if (!$regexp.equals(${termLastRegex})) { @@ -269,14 +279,14 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio ${termLastReplacement} = ${termLastReplacementInUTF8}.toString(); } ${termResult}.delete(0, ${termResult}.length()); - java.util.regex.Matcher m = ${termPattern}.matcher($subject.toString()); + java.util.regex.Matcher ${matcher} = ${termPattern}.matcher($subject.toString()); - while (m.find()) { -m.appendReplacement(${termResult},
spark git commit: [SPARK-16514][SQL] Fix various regex codegen bugs
Repository: spark Updated Branches: refs/heads/branch-2.0 4303d292b -> 41df62c59 [SPARK-16514][SQL] Fix various regex codegen bugs ## What changes were proposed in this pull request? RegexExtract and RegexReplace currently crash on non-nullable input due use of a hard-coded local variable name (e.g. compiles fail with `java.lang.Exception: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 85, Column 26: Redefinition of local variable "m" `). This changes those variables to use fresh names, and also in a few other places. ## How was this patch tested? Unit tests. rxin Author: Eric LiangCloses #14168 from ericl/sc-3906. (cherry picked from commit 1c58fa905b6543d366d00b2e5394dfd633987f6d) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/41df62c5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/41df62c5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/41df62c5 Branch: refs/heads/branch-2.0 Commit: 41df62c595474d7afda6dbe76a558d8cb3be7ff2 Parents: 4303d29 Author: Eric Liang Authored: Tue Jul 12 23:09:02 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 12 23:09:08 2016 -0700 -- .../expressions/regexpExpressions.scala | 48 ++-- .../expressions/StringExpressionsSuite.scala| 6 +++ 2 files changed, 39 insertions(+), 15 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/41df62c5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 541b860..be82b3b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -108,10 +108,11 @@ case class Like(left: Expression, right: Expression) """) } } else { + val rightStr = ctx.freshName("rightStr") nullSafeCodeGen(ctx, ev, (eval1, eval2) => { s""" - String rightStr = ${eval2}.toString(); - ${patternClass} $pattern = ${patternClass}.compile($escapeFunc(rightStr)); + String $rightStr = ${eval2}.toString(); + ${patternClass} $pattern = ${patternClass}.compile($escapeFunc($rightStr)); ${ev.value} = $pattern.matcher(${eval1}.toString()).matches(); """ }) @@ -157,10 +158,11 @@ case class RLike(left: Expression, right: Expression) """) } } else { + val rightStr = ctx.freshName("rightStr") nullSafeCodeGen(ctx, ev, (eval1, eval2) => { s""" - String rightStr = ${eval2}.toString(); - ${patternClass} $pattern = ${patternClass}.compile(rightStr); + String $rightStr = ${eval2}.toString(); + ${patternClass} $pattern = ${patternClass}.compile($rightStr); ${ev.value} = $pattern.matcher(${eval1}.toString()).find(0); """ }) @@ -259,6 +261,8 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio val classNamePattern = classOf[Pattern].getCanonicalName val classNameStringBuffer = classOf[java.lang.StringBuffer].getCanonicalName +val matcher = ctx.freshName("matcher") + ctx.addMutableState("UTF8String", termLastRegex, s"${termLastRegex} = null;") ctx.addMutableState(classNamePattern, termPattern, s"${termPattern} = null;") ctx.addMutableState("String", termLastReplacement, s"${termLastReplacement} = null;") @@ -267,6 +271,12 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio ctx.addMutableState(classNameStringBuffer, termResult, s"${termResult} = new $classNameStringBuffer();") +val setEvNotNull = if (nullable) { + s"${ev.isNull} = false;" +} else { + "" +} + nullSafeCodeGen(ctx, ev, (subject, regexp, rep) => { s""" if (!$regexp.equals(${termLastRegex})) { @@ -280,14 +290,14 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio ${termLastReplacement} = ${termLastReplacementInUTF8}.toString(); } ${termResult}.delete(0, ${termResult}.length()); - java.util.regex.Matcher m = ${termPattern}.matcher($subject.toString()); + java.util.regex.Matcher ${matcher} = ${termPattern}.matcher($subject.toString()); - while (m.find()) { -m.appendReplacement(${termResult},