This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 43c63337f98 [SPARK-34659][UI] Forbid using keyword "proxy" or 
"history" in reverse proxy URL
43c63337f98 is described below

commit 43c63337f98097b046b70fcfb4fae44a3295b513
Author: Gengliang Wang <gengli...@apache.org>
AuthorDate: Wed Apr 13 10:50:52 2022 -0700

    [SPARK-34659][UI] Forbid using keyword "proxy" or "history" in reverse 
proxy URL
    
    ### What changes were proposed in this pull request?
    
    When the reverse proxy URL contains "proxy" or "history", the application 
ID in UI is wrongly parsed.
    For example, if we set spark.ui.reverseProxyURL as "/test/proxy/prefix" or 
"/test/history/prefix", the application ID is parsed as "prefix" and the 
related API calls will fail in stages/executors pages:
    ```
    .../api/v1/applications/prefix/allexecutors
    ```
    instead of
    ```
    .../api/v1/applications/app-20220413142241-0000/allexecutors
    ```
    
    There are more contexts in https://github.com/apache/spark/pull/31774
    We can fix this entirely like https://github.com/apache/spark/pull/36174, 
but it is risky and complicated to do that.
    
    ### Why are the changes needed?
    
    Avoid users setting keywords in reverse proxy URL and getting wrong UI 
results.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    ### How was this patch tested?
    
    A new unit test.
    Also doc preview:
    <img width="1743" alt="image" 
src="https://user-images.githubusercontent.com/1097932/163126641-da315012-aae5-45a5-a048-340a5dd6e91e.png";>
    
    Closes #36176 from gengliangwang/forbidURLPrefix.
    
    Authored-by: Gengliang Wang <gengli...@apache.org>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 core/src/main/scala/org/apache/spark/SparkContext.scala       |  5 ++---
 core/src/main/scala/org/apache/spark/internal/config/UI.scala |  5 +++++
 core/src/test/scala/org/apache/spark/SparkContextSuite.scala  | 11 +++++++++++
 docs/configuration.md                                         |  4 +++-
 4 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala 
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 7257371256d..c6cb5cb5e19 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -592,9 +592,8 @@ class SparkContext(config: SparkConf) extends Logging {
       _env.blockManager.blockStoreClient.setAppAttemptId(attemptId)
     }
     if (_conf.get(UI_REVERSE_PROXY)) {
-      val proxyUrl = _conf.get(UI_REVERSE_PROXY_URL.key, "").stripSuffix("/") +
-        "/proxy/" + _applicationId
-      System.setProperty("spark.ui.proxyBase", proxyUrl)
+      val proxyUrl = 
_conf.get(UI_REVERSE_PROXY_URL).getOrElse("").stripSuffix("/")
+      System.setProperty("spark.ui.proxyBase", proxyUrl + "/proxy/" + 
_applicationId)
     }
     _ui.foreach(_.setAppId(_applicationId))
     _env.blockManager.initialize(_applicationId)
diff --git a/core/src/main/scala/org/apache/spark/internal/config/UI.scala 
b/core/src/main/scala/org/apache/spark/internal/config/UI.scala
index 1790e97b35a..464034b8fcd 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/UI.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/UI.scala
@@ -79,6 +79,11 @@ private[spark] object UI {
       "reach your proxy.")
     .version("2.1.0")
     .stringConf
+    .checkValue ({ s =>
+      val words = s.split("/")
+      !words.contains("proxy") && !words.contains("history") },
+      "Cannot use the keyword 'proxy' or 'history' in reverse proxy URL. Spark 
UI relies on both " +
+        "keywords for getting REST API endpoints from URIs.")
     .createOptional
 
   val UI_KILL_ENABLED = ConfigBuilder("spark.ui.killEnabled")
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala 
b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 8671180b3ca..c64a4371911 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -1343,6 +1343,17 @@ class SparkContextSuite extends SparkFunSuite with 
LocalSparkContext with Eventu
     assert(env.blockManager.blockStoreClient.getAppAttemptId.equals("1"))
   }
 
+  test("SPARK-34659: check invalid UI_REVERSE_PROXY_URL") {
+    val reverseProxyUrl = "http://proxyhost:8080/path/proxy/spark";
+    val conf = new SparkConf().setAppName("testAppAttemptId")
+      .setMaster("pushbasedshuffleclustermanager")
+    conf.set(UI_REVERSE_PROXY, true)
+    conf.set(UI_REVERSE_PROXY_URL, reverseProxyUrl)
+    val msg = intercept[java.lang.IllegalArgumentException] {
+      new SparkContext(conf)
+    }.getMessage
+    assert(msg.contains("Cannot use the keyword 'proxy' or 'history' in 
reverse proxy URL"))
+  }
 }
 
 object SparkContextSuite {
diff --git a/docs/configuration.md b/docs/configuration.md
index 4fa37792a33..c0f3de86f98 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1324,7 +1324,9 @@ Apart from these, the following properties are also 
available, and may be useful
     This setting affects all the workers and application UIs running in the 
cluster and must be set
     identically on all the workers, drivers and masters. In is only effective 
when
     <code>spark.ui.reverseProxy</code> is turned on. This setting is not 
needed when the Spark
-    master web UI is directly reachable.  </td>
+    master web UI is directly reachable.<br/>
+    Note that the value of the setting can't contain the keyword `proxy` or 
`history` after split by "/". Spark UI relies on both keywords for getting REST 
API endpoints from URIs.
+  </td>
   <td>2.1.0</td>
 </tr>
 <tr>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to