spark git commit: [SPARK-17160] Properly escape field names in code-generated error messages
Repository: spark Updated Branches: refs/heads/master d8104158a -> e719b1c04 [SPARK-17160] Properly escape field names in code-generated error messages This patch addresses a corner-case escaping bug where field names which contain special characters were unsafely interpolated into error message string literals in generated Java code, leading to compilation errors. This patch addresses these issues by using `addReferenceObj` to store the error messages as string fields rather than inline string constants. Author: Josh Rosen Closes #15156 from JoshRosen/SPARK-17160. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e719b1c0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e719b1c0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e719b1c0 Branch: refs/heads/master Commit: e719b1c045ba185d242d21bbfcdee2c84dafc587 Parents: d810415 Author: Josh Rosen Authored: Mon Sep 19 20:20:36 2016 -0700 Committer: Josh Rosen Committed: Mon Sep 19 20:20:36 2016 -0700 -- .../apache/spark/sql/catalyst/expressions/misc.scala | 12 +--- .../sql/catalyst/expressions/objects/objects.scala | 12 .../sql/catalyst/expressions/CodeGenerationSuite.scala | 13 - 3 files changed, 29 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e719b1c0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 3692075..92f8fb8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -477,10 +477,13 @@ case class PrintToStderr(child: Expression) extends UnaryExpression { protected override def nullSafeEval(input: Any): Any = input + private val outputPrefix = s"Result of ${child.simpleString} is " + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +val outputPrefixField = ctx.addReferenceObj("outputPrefix", outputPrefix) nullSafeCodeGen(ctx, ev, c => s""" - | System.err.println("Result of ${child.simpleString} is " + $c); + | System.err.println($outputPrefixField + $c); | ${ev.value} = $c; """.stripMargin) } @@ -501,10 +504,12 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa override def prettyName: String = "assert_true" + private val errMsg = s"'${child.simpleString}' is not true!" + override def eval(input: InternalRow) : Any = { val v = child.eval(input) if (v == null || java.lang.Boolean.FALSE.equals(v)) { - throw new RuntimeException(s"'${child.simpleString}' is not true!") + throw new RuntimeException(errMsg) } else { null } @@ -512,9 +517,10 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) +val errMsgField = ctx.addReferenceObj("errMsg", errMsg) ExprCode(code = s"""${eval.code} |if (${eval.isNull} || !${eval.value}) { - | throw new RuntimeException("'${child.simpleString}' is not true."); + | throw new RuntimeException($errMsgField); |}""".stripMargin, isNull = "true", value = "null") } http://git-wip-us.apache.org/repos/asf/spark/blob/e719b1c0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 4da74a0..faf8fec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -938,7 +938,10 @@ case class GetExternalRowField( override def eval(input: InternalRow): Any = throw new UnsupportedOperationException("Only code-generated evaluation is supported") + private val errMsg = s"The ${index}th field '$fieldName' of input row cannot be null." + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +val errMsgField = ctx.addReferenceObj("errMsg", errMsg) val row = child.genCode(ctx) val code = s""" ${row.code} @@ -948,8 +951,7 @@ case class GetExternalRowField( }
spark git commit: [SPARK-17160] Properly escape field names in code-generated error messages
Repository: spark Updated Branches: refs/heads/branch-2.0 c02bc926d -> 7026eb87e [SPARK-17160] Properly escape field names in code-generated error messages This patch addresses a corner-case escaping bug where field names which contain special characters were unsafely interpolated into error message string literals in generated Java code, leading to compilation errors. This patch addresses these issues by using `addReferenceObj` to store the error messages as string fields rather than inline string constants. Author: Josh Rosen Closes #15156 from JoshRosen/SPARK-17160. (cherry picked from commit e719b1c045ba185d242d21bbfcdee2c84dafc587) Signed-off-by: Josh Rosen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7026eb87 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7026eb87 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7026eb87 Branch: refs/heads/branch-2.0 Commit: 7026eb87e7d7799d2818334a2e191dc46987975f Parents: c02bc92 Author: Josh Rosen Authored: Mon Sep 19 20:20:36 2016 -0700 Committer: Josh Rosen Committed: Mon Sep 19 20:21:25 2016 -0700 -- .../apache/spark/sql/catalyst/expressions/misc.scala | 12 +--- .../sql/catalyst/expressions/objects/objects.scala | 12 .../sql/catalyst/expressions/CodeGenerationSuite.scala | 13 - 3 files changed, 29 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7026eb87/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 3692075..92f8fb8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -477,10 +477,13 @@ case class PrintToStderr(child: Expression) extends UnaryExpression { protected override def nullSafeEval(input: Any): Any = input + private val outputPrefix = s"Result of ${child.simpleString} is " + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +val outputPrefixField = ctx.addReferenceObj("outputPrefix", outputPrefix) nullSafeCodeGen(ctx, ev, c => s""" - | System.err.println("Result of ${child.simpleString} is " + $c); + | System.err.println($outputPrefixField + $c); | ${ev.value} = $c; """.stripMargin) } @@ -501,10 +504,12 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa override def prettyName: String = "assert_true" + private val errMsg = s"'${child.simpleString}' is not true!" + override def eval(input: InternalRow) : Any = { val v = child.eval(input) if (v == null || java.lang.Boolean.FALSE.equals(v)) { - throw new RuntimeException(s"'${child.simpleString}' is not true!") + throw new RuntimeException(errMsg) } else { null } @@ -512,9 +517,10 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) +val errMsgField = ctx.addReferenceObj("errMsg", errMsg) ExprCode(code = s"""${eval.code} |if (${eval.isNull} || !${eval.value}) { - | throw new RuntimeException("'${child.simpleString}' is not true."); + | throw new RuntimeException($errMsgField); |}""".stripMargin, isNull = "true", value = "null") } http://git-wip-us.apache.org/repos/asf/spark/blob/7026eb87/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 1cdda53..691edd5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -763,7 +763,10 @@ case class GetExternalRowField( override def eval(input: InternalRow): Any = throw new UnsupportedOperationException("Only code-generated evaluation is supported") + private val errMsg = s"The ${index}th field '$fieldName' of input row cannot be null." + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +val errMsgField = ctx.addReferenceObj("errMsg", errMsg) val row = child.genCode(ctx)