Repository: spark
Updated Branches:
  refs/heads/branch-2.0 670f48222 -> 110876b9a


[SPARK-15165] [SQL] Codegen can break because toCommentSafeString is not 
actually safe

## What changes were proposed in this pull request?

toCommentSafeString method replaces "\u" with "\\\\u" to avoid codegen breaking.
But if the even number of "\" is put before "u", like "\\\\u", in the string 
literal in the query, codegen can break.

Following code causes compilation error.

```
val df = Seq(...).toDF
df.select("'\\\\\\\\u002A/'").show
```

The reason of the compilation error is because "\\\\\\\\\\\\\\\\u002A/" is 
translated into "*/" (the end of comment).

Due to this unsafety, arbitrary code can be injected like as follows.

```
val df = Seq(...).toDF
// Inject "System.exit(1)"
df.select("'\\\\\\\\u002A/{System.exit(1);}/*'").show
```

## How was this patch tested?

Added new test cases.

Author: Kousuke Saruta <saru...@oss.nttdata.co.jp>
Author: sarutak <saru...@oss.nttdata.co.jp>

Closes #12939 from sarutak/SPARK-15165.

(cherry picked from commit c0c3ec35476c756e569a1f34c4b258eb0490585c)
Signed-off-by: Davies Liu <davies....@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/110876b9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/110876b9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/110876b9

Branch: refs/heads/branch-2.0
Commit: 110876b9afe5e4205062fd8e8979e096e585737d
Parents: 670f482
Author: Kousuke Saruta <saru...@oss.nttdata.co.jp>
Authored: Tue May 17 10:07:01 2016 -0700
Committer: Davies Liu <davies....@gmail.com>
Committed: Tue May 17 10:08:52 2016 -0700

----------------------------------------------------------------------
 .../spark/sql/catalyst/util/package.scala       |  13 +-
 .../expressions/CodeGenerationSuite.scala       |  44 ++++
 .../org/apache/spark/sql/SQLQuerySuite.scala    | 264 +++++++++++++++++++
 3 files changed, 320 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/110876b9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
index 3d2a624..f1d6cab 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
@@ -162,7 +162,18 @@ package object util {
   def toCommentSafeString(str: String): String = {
     val len = math.min(str.length, 128)
     val suffix = if (str.length > len) "..." else ""
-    str.substring(0, len).replace("*/", "\\*\\/").replace("\\u", "\\\\u") + 
suffix
+
+    // Unicode literals, like \u0022, should be escaped before
+    // they are put in code comment to avoid codegen breaking.
+    // To escape them, single "\" should be prepended to a series of "\" just 
before "u"
+    // only when the number of "\" is odd.
+    // For example, \u0022 should become to \\u0022
+    // but \\u0022 should not become to \\\u0022 because the first backslash 
escapes the second one,
+    // and \u0022 will remain, means not escaped.
+    // Otherwise, the runtime Java compiler will fail to compile or code 
injection can be allowed.
+    // For details, see SPARK-15165.
+    str.substring(0, len).replace("*/", "*\\/")
+      .replaceAll("(^|[^\\\\])(\\\\(\\\\\\\\)*u)", "$1\\\\$2") + suffix
   }
 
   /* FIX ME

http://git-wip-us.apache.org/repos/asf/spark/blob/110876b9/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 2082cea..db34d12 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -194,4 +194,48 @@ class CodeGenerationSuite extends SparkFunSuite with 
ExpressionEvalHelper {
       true,
       InternalRow(UTF8String.fromString("\\u")))
   }
+
+  test("check compilation error doesn't occur caused by specific literal") {
+    // The end of comment (*/) should be escaped.
+    GenerateUnsafeProjection.generate(
+      Literal.create("*/Compilation error occurs/*", StringType) :: Nil)
+
+    // `\u002A` is `*` and `\u002F` is `/`
+    // so if the end of comment consists of those characters in queries, we 
need to escape them.
+    GenerateUnsafeProjection.generate(
+      Literal.create("\\u002A/Compilation error occurs/*", StringType) :: Nil)
+    GenerateUnsafeProjection.generate(
+      Literal.create("\\\\u002A/Compilation error occurs/*", StringType) :: 
Nil)
+    GenerateUnsafeProjection.generate(
+      Literal.create("\\u002a/Compilation error occurs/*", StringType) :: Nil)
+    GenerateUnsafeProjection.generate(
+      Literal.create("\\\\u002a/Compilation error occurs/*", StringType) :: 
Nil)
+    GenerateUnsafeProjection.generate(
+      Literal.create("*\\u002FCompilation error occurs/*", StringType) :: Nil)
+    GenerateUnsafeProjection.generate(
+      Literal.create("*\\\\u002FCompilation error occurs/*", StringType) :: 
Nil)
+    GenerateUnsafeProjection.generate(
+      Literal.create("*\\002fCompilation error occurs/*", StringType) :: Nil)
+    GenerateUnsafeProjection.generate(
+      Literal.create("*\\\\002fCompilation error occurs/*", StringType) :: Nil)
+    GenerateUnsafeProjection.generate(
+      Literal.create("\\002A\\002FCompilation error occurs/*", StringType) :: 
Nil)
+    GenerateUnsafeProjection.generate(
+      Literal.create("\\\\002A\\002FCompilation error occurs/*", StringType) 
:: Nil)
+    GenerateUnsafeProjection.generate(
+      Literal.create("\\002A\\\\002FCompilation error occurs/*", StringType) 
:: Nil)
+
+    // \ u002X is an invalid unicode literal so it should be escaped.
+    GenerateUnsafeProjection.generate(
+      Literal.create("\\u002X/Compilation error occurs", StringType) :: Nil)
+    GenerateUnsafeProjection.generate(
+      Literal.create("\\\\u002X/Compilation error occurs", StringType) :: Nil)
+
+    // \ u001 is an invalid unicode literal so it should be escaped.
+    GenerateUnsafeProjection.generate(
+      Literal.create("\\u001/Compilation error occurs", StringType) :: Nil)
+    GenerateUnsafeProjection.generate(
+      Literal.create("\\\\u001/Compilation error occurs", StringType) :: Nil)
+
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/110876b9/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 7020841..b67e2bd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2496,4 +2496,268 @@ class SQLQuerySuite extends QueryTest with 
SharedSQLContext {
     }
   }
 
+  test("check code injection is prevented") {
+    // The end of comment (*/) should be escaped.
+    var literal =
+      """|*/
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    var expected =
+      """|*/
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    checkAnswer(
+      sql(s"SELECT '$literal' AS DUMMY"),
+      Row(s"$expected") :: Nil)
+
+    // `\u002A` is `*` and `\u002F` is `/`
+    // so if the end of comment consists of those characters in queries, we 
need to escape them.
+    literal =
+      """|\\u002A/
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    expected =
+      s"""|${"\\u002A/"}
+          |{
+          |  new Object() {
+          |    void f() { throw new RuntimeException("This exception is 
injected."); }
+          |  }.f();
+          |}
+          |/*""".stripMargin
+    checkAnswer(
+      sql(s"SELECT '$literal' AS DUMMY"),
+      Row(s"$expected") :: Nil)
+
+    literal =
+      """|\\\\u002A/
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    expected =
+      """|\\u002A/
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    checkAnswer(
+      sql(s"SELECT '$literal' AS DUMMY"),
+      Row(s"$expected") :: Nil)
+
+    literal =
+      """|\\u002a/
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    expected =
+      s"""|${"\\u002a/"}
+          |{
+          |  new Object() {
+          |    void f() { throw new RuntimeException("This exception is 
injected."); }
+          |  }.f();
+          |}
+          |/*""".stripMargin
+    checkAnswer(
+      sql(s"SELECT '$literal' AS DUMMY"),
+      Row(s"$expected") :: Nil)
+
+    literal =
+      """|\\\\u002a/
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    expected =
+      """|\\u002a/
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    checkAnswer(
+      sql(s"SELECT '$literal' AS DUMMY"),
+      Row(s"$expected") :: Nil)
+
+    literal =
+      """|*\\u002F
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    expected =
+      s"""|${"*\\u002F"}
+          |{
+          |  new Object() {
+          |    void f() { throw new RuntimeException("This exception is 
injected."); }
+          |  }.f();
+          |}
+          |/*""".stripMargin
+    checkAnswer(
+      sql(s"SELECT '$literal' AS DUMMY"),
+      Row(s"$expected") :: Nil)
+
+    literal =
+      """|*\\\\u002F
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    expected =
+      """|*\\u002F
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    checkAnswer(
+      sql(s"SELECT '$literal' AS DUMMY"),
+      Row(s"$expected") :: Nil)
+
+    literal =
+      """|*\\u002f
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    expected =
+      s"""|${"*\\u002f"}
+          |{
+          |  new Object() {
+          |    void f() { throw new RuntimeException("This exception is 
injected."); }
+          |  }.f();
+          |}
+          |/*""".stripMargin
+    checkAnswer(
+      sql(s"SELECT '$literal' AS DUMMY"),
+      Row(s"$expected") :: Nil)
+
+    literal =
+      """|*\\\\u002f
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    expected =
+      """|*\\u002f
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    checkAnswer(
+      sql(s"SELECT '$literal' AS DUMMY"),
+      Row(s"$expected") :: Nil)
+
+    literal =
+      """|\\u002A\\u002F
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    expected =
+      s"""|${"\\u002A\\u002F"}
+          |{
+          |  new Object() {
+          |    void f() { throw new RuntimeException("This exception is 
injected."); }
+          |  }.f();
+          |}
+          |/*""".stripMargin
+    checkAnswer(
+      sql(s"SELECT '$literal' AS DUMMY"),
+      Row(s"$expected") :: Nil)
+
+    literal =
+      """|\\\\u002A\\u002F
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    expected =
+      s"""|${"\\\\u002A\\u002F"}
+          |{
+          |  new Object() {
+          |    void f() { throw new RuntimeException("This exception is 
injected."); }
+          |  }.f();
+          |}
+          |/*""".stripMargin
+    checkAnswer(
+      sql(s"SELECT '$literal' AS DUMMY"),
+      Row(s"$expected") :: Nil)
+
+    literal =
+      """|\\u002A\\\\u002F
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    expected =
+      s"""|${"\\u002A\\\\u002F"}
+          |{
+          |  new Object() {
+          |    void f() { throw new RuntimeException("This exception is 
injected."); }
+          |  }.f();
+          |}
+          |/*""".stripMargin
+    checkAnswer(
+      sql(s"SELECT '$literal' AS DUMMY"),
+      Row(s"$expected") :: Nil)
+
+    literal =
+      """|\\\\u002A\\\\u002F
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    expected =
+      """|\\u002A\\u002F
+         |{
+         |  new Object() {
+         |    void f() { throw new RuntimeException("This exception is 
injected."); }
+         |  }.f();
+         |}
+         |/*""".stripMargin
+    checkAnswer(
+      sql(s"SELECT '$literal' AS DUMMY"),
+      Row(s"$expected") :: Nil)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to