This is an automated email from the ASF dual-hosted git repository. kaxilnaik pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/master by this push: new 178dee9 Simplify cleaning string passed to origin param (#14738) (#14905) 178dee9 is described below commit 178dee9a5ed0cde3d7a7d4a47daeae85408fcd67 Author: Kaxil Naik <kaxiln...@gmail.com> AuthorDate: Sat Mar 20 00:39:21 2021 +0000 Simplify cleaning string passed to origin param (#14738) (#14905) Looks like "trying to be smart approach" in https://github.com/apache/airflow/pull/14738 does not work on old Python versions. The "smart" part being if semicolon exists in URL only those specific query argument were removed. While this solves the issue for Py 3.6.13 it didn't fix for 3.6.12 (although it minimzed it). Python 3.6.12: ```python >>> parse_qsl("r=3;a=b") [('r', '3'), ('a', 'b')] ``` Python 3.6.13: ```python >>> parse_qsl("r=3;a=b") [('r', '3;a=b')] ``` This commit simplifies it and check if the url contains `;`, it just redirects to `/home`. --- airflow/www/views.py | 10 +++------- tests/www/test_views.py | 8 ++++---- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/airflow/www/views.py b/airflow/www/views.py index f31ccdc..633cf7c 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -129,18 +129,14 @@ def get_safe_url(url): parsed = urlparse(url) - # If the url is relative & it contains semicolon, redirect it to homepage to avoid + # If the url contains semicolon, redirect it to homepage to avoid # potential XSS. (Similar to https://github.com/python/cpython/pull/24297/files (bpo-42967)) - if parsed.netloc == '' and parsed.scheme == '' and ';' in unquote(url): + if ';' in unquote(url): return url_for('Airflow.index') query = parse_qsl(parsed.query, keep_blank_values=True) - # Remove all the query elements containing semicolon - # As part of https://github.com/python/cpython/pull/24297/files (bpo-42967) - # semicolon was already removed as a separator for query arguments by default - sanitized_query = [query_arg for query_arg in query if ';' not in query_arg[1]] - url = parsed._replace(query=urlencode(sanitized_query)).geturl() + url = parsed._replace(query=urlencode(query)).geturl() if parsed.scheme in valid_schemes and parsed.netloc in valid_netlocs: return url diff --git a/tests/www/test_views.py b/tests/www/test_views.py index 48aaa01..2e61c03 100644 --- a/tests/www/test_views.py +++ b/tests/www/test_views.py @@ -3353,15 +3353,15 @@ class TestHelperFunctions(TestBase): ("36539'%3balert(1)%2f%2f166", "/home"), ( "http://localhost:8080/trigger?dag_id=test&origin=36539%27%3balert(1)%2f%2f166&abc=2", - "http://localhost:8080/trigger?dag_id=test&abc=2", + "/home", ), ( "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%test_dag';alert(33)//", - "http://localhost:8080/trigger?dag_id=test_dag", + "/home", ), ( - "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%test_dag", - "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%25test_dag", + "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%3Dtest_dag", + "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%3Dtest_dag", ), ] )