This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 43c63337f98 [SPARK-34659][UI] Forbid using keyword "proxy" or "history" in reverse proxy URL 43c63337f98 is described below commit 43c63337f98097b046b70fcfb4fae44a3295b513 Author: Gengliang Wang <gengli...@apache.org> AuthorDate: Wed Apr 13 10:50:52 2022 -0700 [SPARK-34659][UI] Forbid using keyword "proxy" or "history" in reverse proxy URL ### What changes were proposed in this pull request? When the reverse proxy URL contains "proxy" or "history", the application ID in UI is wrongly parsed. For example, if we set spark.ui.reverseProxyURL as "/test/proxy/prefix" or "/test/history/prefix", the application ID is parsed as "prefix" and the related API calls will fail in stages/executors pages: ``` .../api/v1/applications/prefix/allexecutors ``` instead of ``` .../api/v1/applications/app-20220413142241-0000/allexecutors ``` There are more contexts in https://github.com/apache/spark/pull/31774 We can fix this entirely like https://github.com/apache/spark/pull/36174, but it is risky and complicated to do that. ### Why are the changes needed? Avoid users setting keywords in reverse proxy URL and getting wrong UI results. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? A new unit test. Also doc preview: <img width="1743" alt="image" src="https://user-images.githubusercontent.com/1097932/163126641-da315012-aae5-45a5-a048-340a5dd6e91e.png"> Closes #36176 from gengliangwang/forbidURLPrefix. Authored-by: Gengliang Wang <gengli...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- core/src/main/scala/org/apache/spark/SparkContext.scala | 5 ++--- core/src/main/scala/org/apache/spark/internal/config/UI.scala | 5 +++++ core/src/test/scala/org/apache/spark/SparkContextSuite.scala | 11 +++++++++++ docs/configuration.md | 4 +++- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 7257371256d..c6cb5cb5e19 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -592,9 +592,8 @@ class SparkContext(config: SparkConf) extends Logging { _env.blockManager.blockStoreClient.setAppAttemptId(attemptId) } if (_conf.get(UI_REVERSE_PROXY)) { - val proxyUrl = _conf.get(UI_REVERSE_PROXY_URL.key, "").stripSuffix("/") + - "/proxy/" + _applicationId - System.setProperty("spark.ui.proxyBase", proxyUrl) + val proxyUrl = _conf.get(UI_REVERSE_PROXY_URL).getOrElse("").stripSuffix("/") + System.setProperty("spark.ui.proxyBase", proxyUrl + "/proxy/" + _applicationId) } _ui.foreach(_.setAppId(_applicationId)) _env.blockManager.initialize(_applicationId) diff --git a/core/src/main/scala/org/apache/spark/internal/config/UI.scala b/core/src/main/scala/org/apache/spark/internal/config/UI.scala index 1790e97b35a..464034b8fcd 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/UI.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/UI.scala @@ -79,6 +79,11 @@ private[spark] object UI { "reach your proxy.") .version("2.1.0") .stringConf + .checkValue ({ s => + val words = s.split("/") + !words.contains("proxy") && !words.contains("history") }, + "Cannot use the keyword 'proxy' or 'history' in reverse proxy URL. Spark UI relies on both " + + "keywords for getting REST API endpoints from URIs.") .createOptional val UI_KILL_ENABLED = ConfigBuilder("spark.ui.killEnabled") diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala index 8671180b3ca..c64a4371911 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala @@ -1343,6 +1343,17 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu assert(env.blockManager.blockStoreClient.getAppAttemptId.equals("1")) } + test("SPARK-34659: check invalid UI_REVERSE_PROXY_URL") { + val reverseProxyUrl = "http://proxyhost:8080/path/proxy/spark" + val conf = new SparkConf().setAppName("testAppAttemptId") + .setMaster("pushbasedshuffleclustermanager") + conf.set(UI_REVERSE_PROXY, true) + conf.set(UI_REVERSE_PROXY_URL, reverseProxyUrl) + val msg = intercept[java.lang.IllegalArgumentException] { + new SparkContext(conf) + }.getMessage + assert(msg.contains("Cannot use the keyword 'proxy' or 'history' in reverse proxy URL")) + } } object SparkContextSuite { diff --git a/docs/configuration.md b/docs/configuration.md index 4fa37792a33..c0f3de86f98 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1324,7 +1324,9 @@ Apart from these, the following properties are also available, and may be useful This setting affects all the workers and application UIs running in the cluster and must be set identically on all the workers, drivers and masters. In is only effective when <code>spark.ui.reverseProxy</code> is turned on. This setting is not needed when the Spark - master web UI is directly reachable. </td> + master web UI is directly reachable.<br/> + Note that the value of the setting can't contain the keyword `proxy` or `history` after split by "/". Spark UI relies on both keywords for getting REST API endpoints from URIs. + </td> <td>2.1.0</td> </tr> <tr> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org