This is an automated email from the ASF dual-hosted git repository. kaxilnaik pushed a commit to branch v2-0-test in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/v2-0-test by this push: new 2e76ed1 Simplify cleaning string passed to origin param (#14738) (#14905) 2e76ed1 is described below commit 2e76ed13c343b25c548307d36e1febcbaee0eae5 Author: Kaxil Naik <kaxiln...@gmail.com> AuthorDate: Sat Mar 20 00:39:21 2021 +0000 Simplify cleaning string passed to origin param (#14738) (#14905) Looks like "trying to be smart approach" in https://github.com/apache/airflow/pull/14738 does not work on old Python versions. The "smart" part being if semicolon exists in URL only those specific query argument were removed. While this solves the issue for Py 3.6.13 it didn't fix for 3.6.12 (although it minimzed it). Python 3.6.12: ```python >>> parse_qsl("r=3;a=b") [('r', '3'), ('a', 'b')] ``` Python 3.6.13: ```python >>> parse_qsl("r=3;a=b") [('r', '3;a=b')] ``` This commit simplifies it and check if the url contains `;`, it just redirects to `/home`. (cherry picked from commit 178dee9a5ed0cde3d7a7d4a47daeae85408fcd67) --- airflow/www/views.py | 10 +++------- tests/www/test_views.py | 8 ++++---- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/airflow/www/views.py b/airflow/www/views.py index 6d19208..e768464 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -129,18 +129,14 @@ def get_safe_url(url): parsed = urlparse(url) - # If the url is relative & it contains semicolon, redirect it to homepage to avoid + # If the url contains semicolon, redirect it to homepage to avoid # potential XSS. (Similar to https://github.com/python/cpython/pull/24297/files (bpo-42967)) - if parsed.netloc == '' and parsed.scheme == '' and ';' in unquote(url): + if ';' in unquote(url): return url_for('Airflow.index') query = parse_qsl(parsed.query, keep_blank_values=True) - # Remove all the query elements containing semicolon - # As part of https://github.com/python/cpython/pull/24297/files (bpo-42967) - # semicolon was already removed as a separator for query arguments by default - sanitized_query = [query_arg for query_arg in query if ';' not in query_arg[1]] - url = parsed._replace(query=urlencode(sanitized_query)).geturl() + url = parsed._replace(query=urlencode(query)).geturl() if parsed.scheme in valid_schemes and parsed.netloc in valid_netlocs: return url diff --git a/tests/www/test_views.py b/tests/www/test_views.py index f67c478..3adf64a 100644 --- a/tests/www/test_views.py +++ b/tests/www/test_views.py @@ -3299,15 +3299,15 @@ class TestHelperFunctions(TestBase): ("36539'%3balert(1)%2f%2f166", "/home"), ( "http://localhost:8080/trigger?dag_id=test&origin=36539%27%3balert(1)%2f%2f166&abc=2", - "http://localhost:8080/trigger?dag_id=test&abc=2", + "/home", ), ( "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%test_dag';alert(33)//", - "http://localhost:8080/trigger?dag_id=test_dag", + "/home", ), ( - "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%test_dag", - "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%25test_dag", + "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%3Dtest_dag", + "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%3Dtest_dag", ), ] )