sha174n commented on code in PR #39301:
URL: https://github.com/apache/superset/pull/39301#discussion_r3293224250
##########
superset/commands/dataset/importers/v1/utils.py:
##########
@@ -34,9 +37,32 @@
from superset.sql.parse import Table
from superset.utils import json
from superset.utils.core import get_user
+from superset.utils.network import is_safe_host
logger = logging.getLogger(__name__)
+
+class _ValidatingRedirectHandler(HTTPRedirectHandler):
+ """Re-validates the redirect target URL before following any HTTP redirect.
+
+ Prevents bypasses where an initial URL passes validation but a subsequent
+ redirect points to a disallowed destination.
+ """
+
+ def redirect_request(
+ self,
+ req: request.Request,
+ fp: Any,
+ code: int,
+ msg: str,
+ headers: Any,
+ newurl: str,
+ ) -> request.Request | None:
+ """Validate each redirect target before delegating to the parent
handler."""
+ validate_data_uri(newurl)
+ return super().redirect_request(req, fp, code, msg, headers, newurl)
Review Comment:
Already addressed: the redirect handler resolves `newurl` against
`req.full_url` via `urljoin` before validating.
##########
superset/commands/dataset/importers/v1/utils.py:
##########
@@ -82,12 +108,27 @@ def get_dtype(df: pd.DataFrame, dataset: SqlaTable) ->
dict[str, VisitableType]:
def validate_data_uri(data_uri: str) -> None:
"""
- Validate that the data URI is configured on DATASET_IMPORT_ALLOWED_URLS
- has a valid URL.
+ Validate that the data URI is permitted for dataset import.
+
+ Local ``file://`` URIs are allowed only when the path is confined to the
+ bundled examples folder. All other URIs must match a pattern in
+ ``DATASET_IMPORT_ALLOWED_DATA_URLS`` *and* resolve to a publicly-routable
host.
- :param data_uri:
- :return:
+ :param data_uri: the URI to validate
+ :raises DatasetForbiddenDataURI: if the URI is not permitted
"""
+ if data_uri.startswith("file://"):
+ from superset.examples.helpers import get_examples_folder
+
+ # Strip the "file://" prefix to get the filesystem path.
+ file_path = data_uri[len("file://") :]
+ # Resolve symlinks and relative components before comparing.
+ real_path = os.path.realpath(file_path)
+ examples_folder = os.path.realpath(get_examples_folder())
+ if not real_path.startswith(examples_folder + os.sep):
+ raise DatasetForbiddenDataURI()
+ return
Review Comment:
Already addressed: `file://` URIs are now parsed via `urlparse` +
`url2pathname`, and non-local netlocs (anything other than empty or
`localhost`) are explicitly rejected.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]