This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 258746f7f89 [SPARK-42622][CORE] Disable substitution in values
258746f7f89 is described below

commit 258746f7f89352266e4ac367e29aac9fe542dd15
Author: Jelmer Kuperus <jel...@miro.com>
AuthorDate: Thu Mar 2 08:43:32 2023 -0600

    [SPARK-42622][CORE] Disable substitution in values
    
    https://issues.apache.org/jira/browse/SPARK-42622
    
    ### What changes were proposed in this pull request?
    
    Disable substitution in values for the `StringSubstitutor` used to resolve 
error messages
    
    ### Why are the changes needed?
    
    when constructing an error message `ErrorClasssesJSONReader`
    
    1. reads the error message from 
core/src/main/resources/error/error-classes.json
    2. replaces `<fieldValue>` with `${fieldValue}` in the error message it read
    3. uses `StringSubstitutor` to replace `${fieldValue}` with the actual value
    
    If `fieldValue` is defined as `"${foo}"` then it will also try and resolve 
foo.
    When foo is undefined it will throw an IllegalArgumentException
    
    This is very problematic instance in this scenario. Where parsing json will 
fail if it does not adhere to the correct schema
    
    
![image](https://user-images.githubusercontent.com/133639/221866500-99f187a0-8db3-42a7-85ca-b027fdec160d.png)
    
    Here the error message produced will be something like
    
    `Cannot parse the field name properties and the value ${foo} of the JSON 
token type string to target Spark data type struct.`
    
    And because foo is undefined another error will be triggered, and another, 
and another.. until you have a stackoverflow.
    It could be employed as a denial of service attack on data pipelines
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    
    Locally
    
    Before
    
    
![image](https://user-images.githubusercontent.com/133639/221988445-9e751898-1038-40c0-96c6-68881d326a36.png)
    
    After
    
    
![image](https://user-images.githubusercontent.com/133639/221988511-3ae586f2-4c96-44b4-a798-88573350a4ed.png)
    
    Closes #40219 from jelmerk/jelmer/no_string_substitutor.
    
    Authored-by: Jelmer Kuperus <jel...@miro.com>
    Signed-off-by: Sean Owen <sro...@gmail.com>
---
 .../main/scala/org/apache/spark/ErrorClassesJSONReader.scala  |  1 +
 .../src/test/scala/org/apache/spark/SparkThrowableSuite.scala | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala 
b/core/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala
index 34dd13addff..ca7b9cc1bd6 100644
--- a/core/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala
+++ b/core/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala
@@ -47,6 +47,7 @@ class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) {
     val messageTemplate = getMessageTemplate(errorClass)
     val sub = new StringSubstitutor(messageParameters.asJava)
     sub.setEnableUndefinedVariableException(true)
+    sub.setDisableSubstitutionInValues(true)
     try {
       sub.replace(messageTemplate.replaceAll("<([a-zA-Z0-9_-]+)>", 
"\\$\\{$1\\}"))
     } catch {
diff --git a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala 
b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
index c20c287c564..a8c56cf1460 100644
--- a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
@@ -210,6 +210,17 @@ class SparkThrowableSuite extends SparkFunSuite {
     )
   }
 
+  test("Error message does not do substitution on values") {
+    assert(
+      getMessage(
+        "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        Map("objectName" -> "`foo`", "proposal" -> "`${bar}`, `baz`")
+      ) ==
+        "[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter 
with " +
+          "name `foo` cannot be resolved. Did you mean one of the following? 
[`${bar}`, `baz`]."
+    )
+  }
+
   test("Try catching legacy SparkError") {
     try {
       throw new SparkException("Arbitrary legacy message")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to