This is an automated email from the ASF dual-hosted git repository.

pierrejeambrun pushed a commit to branch v3-2-test
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/v3-2-test by this push:
     new b14148f41e8 [v3-2-test] Filter scheduling-dependencies graph edges by 
readable-DAG access (#67627) (#67907)
b14148f41e8 is described below

commit b14148f41e8c921cf4388b5fbdc02e661e5f400e
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Jun 3 15:50:43 2026 +0200

    [v3-2-test] Filter scheduling-dependencies graph edges by readable-DAG 
access (#67627) (#67907)
    
    The UI scheduling-dependencies graph route (`GET 
/ui/dependencies?dependency_type=scheduling`) was filtering only the top-level 
DAG keys in `get_scheduling_dependencies()` by the caller's readable-DAG set. 
For each dependency object under a readable top-level DAG, the route still 
emitted the dependency node (whose `node_id` embeds both source and target DAG 
ids — e.g. 
`trigger:external_trigger_dag_id:downstream:trigger_dag_run_operator`), the 
inbound edge from `dep.source` to `dep.nod [...]
    
    A caller with read access to one DAG would therefore see identifiers and 
edge metadata for other DAGs they cannot read whenever the readable DAG 
referenced those DAGs via trigger/sensor dependencies.
    
    This change extends the readable-DAG filter inside the dependency loop: 
when either `dep.source` or `dep.target` is a bare DAG id outside the caller's 
readable set, the dependency node and both its edges are skipped entirely. 
Asset-prefixed identifiers (`asset:<id>`) are unaffected by the new check.
    
    Reference: airflow-s/airflow-s#441
    (cherry picked from commit e61c9bac30a7b1a7f68589629f10c4cb247c683e)
    
    
    Generated-by: Claude Opus 4.7 (1M context) following the guidelines at 
https://github.com/apache/airflow/blob/main/contributing-docs/05_pull_requests.rst#gen-ai-assisted-contributions
    
    Co-authored-by: Jarek Potiuk <[email protected]>
---
 .../core_api/services/ui/dependencies.py           | 14 +++++++
 .../core_api/routes/ui/test_dependencies.py        | 44 ++++++++++++++++++++++
 2 files changed, 58 insertions(+)

diff --git 
a/airflow-core/src/airflow/api_fastapi/core_api/services/ui/dependencies.py 
b/airflow-core/src/airflow/api_fastapi/core_api/services/ui/dependencies.py
index ead14c68a2d..f125e22fce9 100644
--- a/airflow-core/src/airflow/api_fastapi/core_api/services/ui/dependencies.py
+++ b/airflow-core/src/airflow/api_fastapi/core_api/services/ui/dependencies.py
@@ -98,6 +98,20 @@ def get_scheduling_dependencies(readable_dag_ids: set[str] | 
None = None) -> dic
         dag_node_id = f"dag:{dag}"
         if dag_node_id not in nodes_dict:
             for dep in dependencies:
+                # Skip dependency objects whose edge endpoints reference DAGs
+                # outside the caller's readable set. ``dep.node_id`` /
+                # ``dep.source`` / ``dep.target`` would otherwise embed those
+                # DAG ids in the response even when the top-level filter
+                # above hides the DAG itself.
+                if readable_dag_ids is not None:
+                    referenced_dag_ids: set[str] = set()
+                    if dep.source != dep.dependency_type and ":" not in 
dep.source:
+                        referenced_dag_ids.add(dep.source)
+                    if dep.target != dep.dependency_type and ":" not in 
dep.target:
+                        referenced_dag_ids.add(dep.target)
+                    if not referenced_dag_ids.issubset(readable_dag_ids):
+                        continue
+
                 # Add nodes
                 nodes_dict[dag_node_id] = {"id": dag_node_id, "label": dag, 
"type": "dag"}
                 if dep.node_id not in nodes_dict:
diff --git 
a/airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_dependencies.py 
b/airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_dependencies.py
index 2ea701c9a6d..d02a66393bc 100644
--- 
a/airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_dependencies.py
+++ 
b/airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_dependencies.py
@@ -358,6 +358,50 @@ class TestGetDependencies:
         for node_id in expected_absent:
             assert node_id not in dag_node_ids
 
+    @mock.patch(
+        
"airflow.api_fastapi.auth.managers.base_auth_manager.BaseAuthManager.get_authorized_dag_ids",
+        return_value={"downstream"},
+    )
+    @pytest.mark.usefixtures("make_primary_connected_component")
+    def 
test_scheduling_dependencies_redacts_trigger_sensor_endpoints_referencing_unreadable_dags(
+        self, _, test_client, asset1_id
+    ):
+        """Trigger/sensor dependency objects under a readable top-level DAG 
must
+        not leak unreadable DAG identifiers through ``dep.node_id`` /
+        ``dep.source`` / ``dep.target``. The top-level filter only hides the
+        unreadable DAG as a top-level key; this regression check covers the
+        edge-endpoint leak."""
+        response = test_client.get("/dependencies")
+        assert response.status_code == 200
+
+        result = response.json()
+        unreadable_dag_ids = {"external_trigger_dag_id", "other_dag", 
"upstream"}
+
+        # No node id may contain any unreadable DAG identifier (covers the
+        # bare ``dag:`` nodes that the top-level filter already hid, plus
+        # the ``trigger:.../sensor:...`` nodes whose ids embed both endpoints).
+        for node in result["nodes"]:
+            for unreadable in unreadable_dag_ids:
+                assert unreadable not in node["id"], (
+                    f"node id {node['id']!r} leaks unreadable DAG 
{unreadable!r}"
+                )
+
+        # No edge endpoint may be a ``dag:<unreadable>`` reference, and no
+        # endpoint may be a ``trigger:.../sensor:...`` node whose id embeds
+        # an unreadable DAG.
+        for edge in result["edges"]:
+            for endpoint in (edge["source_id"], edge["target_id"]):
+                for unreadable in unreadable_dag_ids:
+                    assert unreadable not in endpoint, (
+                        f"edge endpoint {endpoint!r} leaks unreadable DAG 
{unreadable!r}"
+                    )
+
+        # The readable top-level DAG itself must still be present, along with
+        # its legitimate asset-scheduled-by edge (asset ids are not DAG ids
+        # and are unaffected by the readable-DAG filter).
+        dag_node_ids = {node["id"] for node in result["nodes"] if node["type"] 
== "dag"}
+        assert dag_node_ids == {"dag:downstream"}
+
     @pytest.mark.parametrize(
         ("readable_dags", "expected_present", "expected_absent"),
         [

Reply via email to