spark git commit: [SPARK-16514][SQL] Fix various regex codegen bugs

2016-07-13 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 702178d1f -> 9808735e0


[SPARK-16514][SQL] Fix various regex codegen bugs

## What changes were proposed in this pull request?

RegexExtract and RegexReplace currently crash on non-nullable input due use of 
a hard-coded local variable name (e.g. compiles fail with `java.lang.Exception: 
failed to compile: org.codehaus.commons.compiler.CompileException: File 
'generated.java', Line 85, Column 26: Redefinition of local variable "m" `).

This changes those variables to use fresh names, and also in a few other places.

## How was this patch tested?

Unit tests. rxin

Author: Eric Liang 

Closes #14168 from ericl/sc-3906.

(cherry picked from commit 1c58fa905b6543d366d00b2e5394dfd633987f6d)
Signed-off-by: Reynold Xin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9808735e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9808735e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9808735e

Branch: refs/heads/branch-1.6
Commit: 9808735e0ce91c68df4c1ce82c44543995d44aed
Parents: 702178d
Author: Eric Liang 
Authored: Tue Jul 12 23:09:02 2016 -0700
Committer: Reynold Xin 
Committed: Tue Jul 12 23:09:31 2016 -0700

--
 .../expressions/regexpExpressions.scala | 48 ++--
 .../expressions/StringExpressionsSuite.scala|  6 +++
 2 files changed, 39 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9808735e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 9e484c5..154c7a0 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -104,10 +104,11 @@ case class Like(left: Expression, right: Expression)
 """
   }
 } else {
+  val rightStr = ctx.freshName("rightStr")
   nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
 s"""
-  String rightStr = ${eval2}.toString();
-  ${patternClass} $pattern = 
${patternClass}.compile($escapeFunc(rightStr));
+  String $rightStr = ${eval2}.toString();
+  ${patternClass} $pattern = 
${patternClass}.compile($escapeFunc($rightStr));
   ${ev.value} = $pattern.matcher(${eval1}.toString()).matches();
 """
   })
@@ -152,10 +153,11 @@ case class RLike(left: Expression, right: Expression)
 """
   }
 } else {
+  val rightStr = ctx.freshName("rightStr")
   nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
 s"""
-  String rightStr = ${eval2}.toString();
-  ${patternClass} $pattern = ${patternClass}.compile(rightStr);
+  String $rightStr = ${eval2}.toString();
+  ${patternClass} $pattern = ${patternClass}.compile($rightStr);
   ${ev.value} = $pattern.matcher(${eval1}.toString()).find(0);
 """
   })
@@ -248,6 +250,8 @@ case class RegExpReplace(subject: Expression, regexp: 
Expression, rep: Expressio
 val classNamePattern = classOf[Pattern].getCanonicalName
 val classNameStringBuffer = 
classOf[java.lang.StringBuffer].getCanonicalName
 
+val matcher = ctx.freshName("matcher")
+
 ctx.addMutableState("UTF8String", termLastRegex, s"${termLastRegex} = 
null;")
 ctx.addMutableState(classNamePattern, termPattern, s"${termPattern} = 
null;")
 ctx.addMutableState("String", termLastReplacement, 
s"${termLastReplacement} = null;")
@@ -256,6 +260,12 @@ case class RegExpReplace(subject: Expression, regexp: 
Expression, rep: Expressio
 ctx.addMutableState(classNameStringBuffer,
   termResult, s"${termResult} = new $classNameStringBuffer();")
 
+val setEvNotNull = if (nullable) {
+  s"${ev.isNull} = false;"
+} else {
+  ""
+}
+
 nullSafeCodeGen(ctx, ev, (subject, regexp, rep) => {
 s"""
   if (!$regexp.equals(${termLastRegex})) {
@@ -269,14 +279,14 @@ case class RegExpReplace(subject: Expression, regexp: 
Expression, rep: Expressio
 ${termLastReplacement} = ${termLastReplacementInUTF8}.toString();
   }
   ${termResult}.delete(0, ${termResult}.length());
-  java.util.regex.Matcher m = ${termPattern}.matcher($subject.toString());
+  java.util.regex.Matcher ${matcher} = 
${termPattern}.matcher($subject.toString());
 
-  while (m.find()) {
-m.appendReplacement(${termResult}, 

spark git commit: [SPARK-16514][SQL] Fix various regex codegen bugs

2016-07-13 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 4303d292b -> 41df62c59


[SPARK-16514][SQL] Fix various regex codegen bugs

## What changes were proposed in this pull request?

RegexExtract and RegexReplace currently crash on non-nullable input due use of 
a hard-coded local variable name (e.g. compiles fail with `java.lang.Exception: 
failed to compile: org.codehaus.commons.compiler.CompileException: File 
'generated.java', Line 85, Column 26: Redefinition of local variable "m" `).

This changes those variables to use fresh names, and also in a few other places.

## How was this patch tested?

Unit tests. rxin

Author: Eric Liang 

Closes #14168 from ericl/sc-3906.

(cherry picked from commit 1c58fa905b6543d366d00b2e5394dfd633987f6d)
Signed-off-by: Reynold Xin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/41df62c5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/41df62c5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/41df62c5

Branch: refs/heads/branch-2.0
Commit: 41df62c595474d7afda6dbe76a558d8cb3be7ff2
Parents: 4303d29
Author: Eric Liang 
Authored: Tue Jul 12 23:09:02 2016 -0700
Committer: Reynold Xin 
Committed: Tue Jul 12 23:09:08 2016 -0700

--
 .../expressions/regexpExpressions.scala | 48 ++--
 .../expressions/StringExpressionsSuite.scala|  6 +++
 2 files changed, 39 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/41df62c5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 541b860..be82b3b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -108,10 +108,11 @@ case class Like(left: Expression, right: Expression)
 """)
   }
 } else {
+  val rightStr = ctx.freshName("rightStr")
   nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
 s"""
-  String rightStr = ${eval2}.toString();
-  ${patternClass} $pattern = 
${patternClass}.compile($escapeFunc(rightStr));
+  String $rightStr = ${eval2}.toString();
+  ${patternClass} $pattern = 
${patternClass}.compile($escapeFunc($rightStr));
   ${ev.value} = $pattern.matcher(${eval1}.toString()).matches();
 """
   })
@@ -157,10 +158,11 @@ case class RLike(left: Expression, right: Expression)
 """)
   }
 } else {
+  val rightStr = ctx.freshName("rightStr")
   nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
 s"""
-  String rightStr = ${eval2}.toString();
-  ${patternClass} $pattern = ${patternClass}.compile(rightStr);
+  String $rightStr = ${eval2}.toString();
+  ${patternClass} $pattern = ${patternClass}.compile($rightStr);
   ${ev.value} = $pattern.matcher(${eval1}.toString()).find(0);
 """
   })
@@ -259,6 +261,8 @@ case class RegExpReplace(subject: Expression, regexp: 
Expression, rep: Expressio
 val classNamePattern = classOf[Pattern].getCanonicalName
 val classNameStringBuffer = 
classOf[java.lang.StringBuffer].getCanonicalName
 
+val matcher = ctx.freshName("matcher")
+
 ctx.addMutableState("UTF8String", termLastRegex, s"${termLastRegex} = 
null;")
 ctx.addMutableState(classNamePattern, termPattern, s"${termPattern} = 
null;")
 ctx.addMutableState("String", termLastReplacement, 
s"${termLastReplacement} = null;")
@@ -267,6 +271,12 @@ case class RegExpReplace(subject: Expression, regexp: 
Expression, rep: Expressio
 ctx.addMutableState(classNameStringBuffer,
   termResult, s"${termResult} = new $classNameStringBuffer();")
 
+val setEvNotNull = if (nullable) {
+  s"${ev.isNull} = false;"
+} else {
+  ""
+}
+
 nullSafeCodeGen(ctx, ev, (subject, regexp, rep) => {
 s"""
   if (!$regexp.equals(${termLastRegex})) {
@@ -280,14 +290,14 @@ case class RegExpReplace(subject: Expression, regexp: 
Expression, rep: Expressio
 ${termLastReplacement} = ${termLastReplacementInUTF8}.toString();
   }
   ${termResult}.delete(0, ${termResult}.length());
-  java.util.regex.Matcher m = ${termPattern}.matcher($subject.toString());
+  java.util.regex.Matcher ${matcher} = 
${termPattern}.matcher($subject.toString());
 
-  while (m.find()) {
-m.appendReplacement(${termResult},