(tooling-agents) 02/02: Pushing github review code up

akm Thu, 02 Apr 2026 17:05:28 -0700

This is an automated email from the ASF dual-hosted git repository.

akm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-agents.git


commit 98608bffa816417257456ab6c7ff6c2bb84ddaf1
Author: Andrew Musselman <[email protected]>
AuthorDate: Thu Apr 2 17:05:08 2026 -0700

    Pushing github review code up
---
 .../{code.py => agents/publishing.py}              |  13 +-
 repos/apache/github-review/agents/security.py      | 771 +++++++++++++++++++++
 repos/apache/github-review/agents/summary.py       | 406 +++++++++++
 repos/apache/github-review/monitor-agent.sh        | 176 -----
 repos/apache/github-review/report.md               | 203 ------
 5 files changed, 1187 insertions(+), 382 deletions(-)

diff --git a/repos/apache/github-review/code.py 
b/repos/apache/github-review/agents/publishing.py
similarity index 98%
rename from repos/apache/github-review/code.py
rename to repos/apache/github-review/agents/publishing.py
index 8d8ff40..662f9ed 100644
--- a/repos/apache/github-review/code.py
+++ b/repos/apache/github-review/agents/publishing.py
@@ -572,7 +572,10 @@ async def run(input_dict, tools):
                 entry = {"repo": repo, **w}
                 entry["ecosystems"] = ecosystems_raw
                 by_category[cat].append(entry)
-                publishing_repos.add(repo)
+
+                # Only count as "publishing" for supply-chain purposes if 
release or snapshot
+                if cat in ("release_artifact", "snapshot_artifact"):
+                    publishing_repos.add(repo)
 
                 for eco in ecosystems_raw:
                     if eco and eco != "github_actions_artifacts":
@@ -595,7 +598,7 @@ async def run(input_dict, tools):
         lines.append(f"| Repositories scanned | {stats['repos_scanned']} |")
         lines.append(f"| Repositories with workflows | 
{stats['repos_with_workflows']} |")
         lines.append(f"| Total workflow files | {stats['total_workflows']} |")
-        lines.append(f"| **Repos with any publishing** | 
**{len(publishing_repos)}** |")
+        lines.append(f"| **Repos publishing to registries** | 
**{len(publishing_repos)}** |")
         lines.append(f"| Release artifact workflows | {len(release_wfs)} |")
         lines.append(f"| Snapshot / nightly workflows | {len(snapshot_wfs)} |")
         lines.append(f"| CI infrastructure image workflows | {len(ci_wfs)} |")
@@ -813,7 +816,11 @@ async def run(input_dict, tools):
             lines.append("*No release or snapshot publishing workflows 
detected.*\n")
 
         # --- Non-publishing repos ---
-        non_publishing = sorted([r for r in all_results.keys() if r not in 
publishing_repos])
+        # Repos that have doc/CI workflows are covered above; only list truly 
non-publishing repos
+        repos_with_any_output = set(publishing_repos)
+        for w in ci_wfs + doc_wfs:
+            repos_with_any_output.add(w.get("repo", ""))
+        non_publishing = sorted([r for r in all_results.keys() if r not in 
repos_with_any_output])
         if non_publishing:
             lines.append("## Repositories with Workflows (No Publishing 
Detected)\n")
             lines.append(f"{len(non_publishing)} repositories had workflow 
files but no publishing of any kind.\n")
diff --git a/repos/apache/github-review/agents/security.py 
b/repos/apache/github-review/agents/security.py
new file mode 100644
index 0000000..71ab0bb
--- /dev/null
+++ b/repos/apache/github-review/agents/security.py
@@ -0,0 +1,771 @@
+from agent_factory.remote_mcp_client import RemoteMCPClient
+from services.llm_service import call_llm
+import httpx
+
+async def run(input_dict, tools):
+    mcpc = { url : RemoteMCPClient(remote_url = url) for url in tools.keys() }
+    http_client = httpx.AsyncClient()
+    try:
+        owner = input_dict.get("owner", "apache")
+        github_pat = input_dict.get("github_pat", "").strip()
+        clear_cache_raw = input_dict.get("clear_cache", "false")
+        clear_cache = str(clear_cache_raw).lower().strip() in ("true", "1", 
"yes")
+
+        if not github_pat:
+            return {"outputText": "Error: `github_pat` is required."}
+
+        GITHUB_API = "https://api.github.com";
+        gh_headers = {"Accept": "application/vnd.github.v3+json", 
"Authorization": f"token {github_pat}"}
+
+        workflow_ns = data_store.use_namespace(f"ci-workflows:{owner}")
+        security_ns = data_store.use_namespace(f"ci-security:{owner}")
+
+        if clear_cache:
+            print("Clearing security cache...", flush=True)
+            for key in security_ns.list_keys():
+                security_ns.delete(key)
+            print("Cache cleared.", flush=True)
+
+        all_wf_keys = workflow_ns.list_keys()
+        if not all_wf_keys:
+            return {"outputText": "Error: no cached workflows found in 
`ci-workflows:" + owner + "`. "
+                    "Run the Publishing Analyzer agent first."}
+
+        repos = {}
+        for key in all_wf_keys:
+            if "/" in key:
+                repo, wf_name = key.split("/", 1)
+                repos.setdefault(repo, []).append(wf_name)
+
+        print(f"Found {len(all_wf_keys)} cached workflows across {len(repos)} 
repos\n", flush=True)
+
+        async def github_get(url, max_retries=3):
+            for attempt in range(max_retries):
+                try:
+                    resp = await http_client.get(url, headers=gh_headers, 
timeout=15.0)
+                    if resp.status_code == 429 or (resp.status_code == 403 and
+                            resp.headers.get("X-RateLimit-Remaining", "1") == 
"0"):
+                        await asyncio.sleep(30)
+                        continue
+                    return resp
+                except Exception:
+                    if attempt < max_retries - 1:
+                        await asyncio.sleep(2)
+            return None
+
+        TRUSTED_ORGS = {
+            "actions", "github", "docker", "google-github-actions", 
"aws-actions",
+            "azure", "hashicorp", "gradle", "ruby", "codecov", "peaceiris",
+            "pypa", "peter-evans", "softprops", "JamesIves", "crazy-max",
+            "dorny", "EnricoMi", "pnpm", "apache",
+        }
+
+        PR_TRIGGERS = {"pull_request", "pull_request_target", "issue_comment"}
+
+        # --- Pattern matching helpers ---
+
+        def is_sha_pinned(ref):
+            if not ref:
+                return False
+            return len(ref) == 40 and all(c in "0123456789abcdef" for c in 
ref.lower())
+
+        def extract_action_refs(content):
+            refs = []
+            for line in content.split("\n"):
+                stripped = line.strip()
+                if "uses:" in stripped:
+                    idx = stripped.index("uses:")
+                    action_ref = stripped[idx + 5:].strip().strip("'\"")
+                    if "#" in action_ref:
+                        action_ref = action_ref[:action_ref.index("#")].strip()
+                    if action_ref and not action_ref.startswith("$"):
+                        refs.append(action_ref)
+            return refs
+
+        def parse_action_ref(ref):
+            if ref.startswith("./"):
+                return {"type": "local", "path": ref, "raw": ref}
+            if "@" in ref:
+                action_path, version = ref.rsplit("@", 1)
+                parts = action_path.split("/")
+                org = parts[0] if parts else ""
+                name = "/".join(parts[:2]) if len(parts) >= 2 else action_path
+                return {"type": "remote", "org": org, "name": name, "full": 
action_path,
+                        "version": version, "pinned": is_sha_pinned(version), 
"raw": ref}
+            return {"type": "unknown", "raw": ref}
+
+        def extract_triggers(content):
+            triggers = set()
+            in_on = False
+            for line in content.split("\n"):
+                stripped = line.strip()
+                if stripped.startswith("on:"):
+                    in_on = True
+                    rest = stripped[3:].strip()
+                    if rest.startswith("["):
+                        for t in rest.strip("[]").split(","):
+                            triggers.add(t.strip())
+                        in_on = False
+                    elif rest and not rest.startswith("#"):
+                        triggers.add(rest.rstrip(":"))
+                    continue
+                if in_on:
+                    if stripped and not stripped.startswith("#"):
+                        if not line.startswith(" ") and not 
line.startswith("\t"):
+                            in_on = False
+                            continue
+                        if ":" in stripped:
+                            trigger_name = stripped.split(":")[0].strip()
+                            if trigger_name and not 
trigger_name.startswith("-"):
+                                triggers.add(trigger_name)
+            return triggers
+
+        def extract_permissions(content):
+            perms = {}
+            in_perms = False
+            indent = 0
+            for line in content.split("\n"):
+                stripped = line.strip()
+                if stripped.startswith("permissions:"):
+                    rest = stripped[12:].strip()
+                    if rest and rest != "{}" and not rest.startswith("#"):
+                        perms["_level"] = rest
+                        return perms
+                    in_perms = True
+                    indent = len(line) - len(line.lstrip())
+                    continue
+                if in_perms:
+                    if not stripped or stripped.startswith("#"):
+                        continue
+                    cur_indent = len(line) - len(line.lstrip())
+                    if cur_indent <= indent and stripped:
+                        break
+                    if ":" in stripped:
+                        key, val = stripped.split(":", 1)
+                        perms[key.strip()] = val.strip()
+            return perms
+
+        def find_injection_in_run_blocks(content, context_label=""):
+            """Find ${{ }} interpolation in run: blocks. Returns list of 
(severity, detail)."""
+            findings = []
+            in_run = False
+            run_indent = 0
+            current_step = ""
+
+            for line in content.split("\n"):
+                stripped = line.strip()
+
+                if stripped.startswith("- name:"):
+                    current_step = stripped[7:].strip().strip("'\"")
+
+                if stripped.startswith("run:"):
+                    in_run = True
+                    run_indent = len(line) - len(line.lstrip())
+                    run_content = stripped[4:].strip()
+                    if run_content.startswith("|") or 
run_content.startswith(">"):
+                        continue
+                    if "${{" in run_content:
+                        findings.extend(_classify_interpolation(run_content, 
current_step, context_label))
+                    in_run = False
+                    continue
+
+                if in_run:
+                    cur_indent = len(line) - len(line.lstrip())
+                    if stripped and cur_indent <= run_indent:
+                        in_run = False
+                    elif "${{" in line:
+                        findings.extend(_classify_interpolation(line, 
current_step, context_label))
+
+            return findings
+
+        def _classify_interpolation(line, step_name, context_label=""):
+            findings = []
+            prefix = f" in {context_label}" if context_label else ""
+            step_info = f" at step '{step_name}'" if step_name else ""
+
+            import re as _re
+            exprs = _re.findall(r'\$\{\{([^}]+)\}\}', line)
+
+            for expr in exprs:
+                expr = expr.strip()
+                expr_lower = expr.lower()
+
+                untrusted_patterns = [
+                    "event.pull_request.title", "event.pull_request.body",
+                    "event.pull_request.head.ref", 
"event.pull_request.head.label",
+                    "event.issue.title", "event.issue.body",
+                    "event.comment.body", "event.review.body",
+                    "event.discussion.title", "event.discussion.body",
+                ]
+                if any(p in expr_lower for p in untrusted_patterns):
+                    findings.append(("CRITICAL",
+                        f"Direct interpolation of untrusted input `${{{{ 
{expr} }}}}` in run block"
+                        f"{step_info}{prefix}. Exploitable by external 
contributors."))
+                    continue
+
+                if "secrets." in expr_lower:
+                    findings.append(("LOW",
+                        f"Secret `${{{{ {expr} }}}}` directly interpolated in 
run block"
+                        f"{step_info}{prefix}. Trusted value but risks log 
leakage."))
+                    continue
+
+                if "event.inputs." in expr_lower or "inputs." in expr_lower:
+                    findings.append(("LOW",
+                        f"Workflow input `${{{{ {expr} }}}}` directly 
interpolated in run block"
+                        f"{step_info}{prefix}. Trusted committer input but 
should use env: block."))
+                    continue
+
+                github_controlled = [
+                    "github.actor", "github.sha", "github.ref", 
"github.repository",
+                    "github.run_id", "github.run_number", "github.workspace",
+                    "github.ref_name", "github.head_ref", "github.base_ref",
+                    "runner.", "matrix.", "steps.", "needs.", "env.",
+                ]
+                if any(p in expr_lower for p in github_controlled):
+                    continue
+
+            return findings
+
+        def check_prt_checkout(content):
+            triggers = extract_triggers(content)
+            if "pull_request_target" not in triggers:
+                return None
+            has_checkout = False
+            checks_head = False
+            for line in content.split("\n"):
+                stripped = line.strip()
+                if "actions/checkout" in stripped:
+                    has_checkout = True
+                if has_checkout and ("pull_request.head.sha" in stripped or
+                                     "pull_request.head.ref" in stripped or
+                                     "github.event.pull_request.head" in 
stripped):
+                    checks_head = True
+                    break
+            if has_checkout and checks_head:
+                return ("CRITICAL", "pull_request_target trigger with checkout 
of PR head code. "
+                        "Untrusted PR code executes with base repo secrets and 
write permissions.")
+            elif has_checkout:
+                return ("LOW", "pull_request_target trigger with checkout 
action present. "
+                        "Verify the checkout uses the base ref, not PR head.")
+            return None
+
+        def check_self_hosted(content, triggers):
+            has_self_hosted = "self-hosted" in content
+            has_pr_trigger = bool(triggers & PR_TRIGGERS)
+            if has_self_hosted and has_pr_trigger:
+                return ("HIGH", "Self-hosted runner with PR trigger. External 
contributors can "
+                        "execute arbitrary code on self-hosted 
infrastructure.")
+            elif has_self_hosted:
+                return ("INFO", "Uses self-hosted runners. Ensure runners are 
ephemeral.")
+            return None
+
+        def check_permissions(content):
+            perms = extract_permissions(content)
+            findings = []
+            level = perms.get("_level", "")
+            if level in ("write-all", "read-all|write-all"):
+                findings.append(("HIGH", "Workflow uses `permissions: 
write-all`. "
+                                 "Follow least-privilege principle."))
+            write_perms = [k for k, v in perms.items() if v == "write" and k 
!= "_level"]
+            if len(write_perms) > 3:
+                findings.append(("LOW", f"Requests write access to 
{len(write_perms)} scopes: "
+                                 f"{', '.join(write_perms)}."))
+            return findings
+
+        def check_cache_poisoning(content, triggers):
+            has_pr = bool(triggers & {"pull_request", "pull_request_target"})
+            has_cache = "actions/cache" in content
+            if has_cache and has_pr:
+                for line in content.split("\n"):
+                    if "key:" in line and ("pull_request" in line or 
"head_ref" in line):
+                        return ("HIGH", "Cache key derived from PR-controlled 
value. "
+                                "A malicious PR could poison the cache.")
+                return ("INFO", "Uses actions/cache with PR trigger. Verify 
cache keys "
+                        "are not PR-controlled.")
+            return None
+
+        def deduplicate_findings(findings):
+            """Collapse repeated same-check same-file findings into 
summaries."""
+            deduped = []
+            # Group by (check, file, severity)
+            groups = {}
+            for f in findings:
+                key = (f["check"], f["file"], f["severity"])
+                groups.setdefault(key, []).append(f)
+
+            for (check, file, severity), items in groups.items():
+                if len(items) == 1:
+                    deduped.append(items[0])
+                elif check in ("run_block_injection", 
"composite_action_injection"):
+                    # Summarize: extract unique expressions
+                    import re as _re
+                    exprs = set()
+                    for item in items:
+                        found = _re.findall(r'`\$\{\{ ([^}]+) \}\}`', 
item["detail"])
+                        exprs.update(found)
+                    expr_list = sorted(exprs)[:5]
+                    expr_str = ", ".join(f"`{e}`" for e in expr_list)
+                    more = f" +{len(exprs) - 5} more" if len(exprs) > 5 else ""
+                    deduped.append({
+                        "check": check,
+                        "file": file,
+                        "severity": severity,
+                        "detail": (f"{len(items)} instances of direct 
interpolation in run blocks. "
+                                   f"Expressions: {expr_str}{more}."),
+                        "count": len(items),
+                    })
+                elif check == "composite_action_unpinned":
+                    # Summarize unpinned refs inside one composite action
+                    refs = [item["detail"].split("`")[1] if "`" in 
item["detail"] else "?" for item in items]
+                    unique_refs = sorted(set(refs))
+                    deduped.append({
+                        "check": check,
+                        "file": file,
+                        "severity": severity,
+                        "detail": (f"{len(items)} unpinned action refs in 
composite action: "
+                                   f"{', '.join(f'`{r}`' for r in 
unique_refs[:5])}"
+                                   + (f" +{len(unique_refs)-5} more" if 
len(unique_refs) > 5 else "")),
+                        "count": len(items),
+                    })
+                else:
+                    # For other checks, keep first and note count
+                    entry = dict(items[0])
+                    if len(items) > 1:
+                        entry["detail"] = f"({len(items)}x) {entry['detail']}"
+                        entry["count"] = len(items)
+                    deduped.append(entry)
+
+            return deduped
+
+
+        # ===== Main scan loop =====
+        all_findings = {}
+        repos_scanned = 0
+
+        for repo_name, wf_names in sorted(repos.items()):
+            repos_scanned += 1
+
+            if repos_scanned % 10 == 1:
+                print(f"[{repos_scanned}/{len(repos)}] Scanning 
{repo_name}...", flush=True)
+
+            cached = security_ns.get(f"findings:{repo_name}")
+            if cached is not None and not clear_cache:
+                if cached:
+                    all_findings[repo_name] = cached
+                continue
+
+            repo_findings = []
+            all_action_refs = []
+            repo_triggers = set()
+
+            # --- Analyze each cached workflow ---
+            for wf_name in wf_names:
+                # Skip composite action files — analyzed separately in Check 9
+                if ".github/actions/" in wf_name:
+                    continue
+
+                content = workflow_ns.get(f"{repo_name}/{wf_name}")
+                if not content or not isinstance(content, str):
+                    continue
+
+                triggers = extract_triggers(content)
+                repo_triggers.update(triggers)
+                action_refs = extract_action_refs(content)
+                all_action_refs.extend([(wf_name, ref) for ref in action_refs])
+
+                # Check 1: pull_request_target + checkout
+                prt = check_prt_checkout(content)
+                if prt:
+                    repo_findings.append({"check": "prt_checkout", "severity": 
prt[0],
+                                          "file": wf_name, "detail": prt[1]})
+
+                # Check 2: Self-hosted runners
+                sh = check_self_hosted(content, triggers)
+                if sh:
+                    repo_findings.append({"check": "self_hosted_runner", 
"severity": sh[0],
+                                          "file": wf_name, "detail": sh[1]})
+
+                # Check 3: Permissions
+                for sev, detail in check_permissions(content):
+                    repo_findings.append({"check": "broad_permissions", 
"severity": sev,
+                                          "file": wf_name, "detail": detail})
+
+                # Check 4: Cache poisoning
+                cp = check_cache_poisoning(content, triggers)
+                if cp:
+                    repo_findings.append({"check": "cache_poisoning", 
"severity": cp[0],
+                                          "file": wf_name, "detail": cp[1]})
+
+                # Check 5: Injection in workflow run blocks
+                injections = find_injection_in_run_blocks(content, 
context_label=f"workflow {wf_name}")
+                for sev, detail in injections:
+                    repo_findings.append({"check": "run_block_injection", 
"severity": sev,
+                                          "file": wf_name, "detail": detail})
+
+            # Check 6: Unpinned actions (repo-wide summary)
+            unpinned = []
+            third_party = []
+            for wf_name, ref in all_action_refs:
+                parsed = parse_action_ref(ref)
+                if parsed["type"] == "local":
+                    continue
+                if parsed["type"] == "remote":
+                    if not parsed["pinned"]:
+                        unpinned.append({"file": wf_name, "action": 
parsed["raw"],
+                                         "org": parsed["org"], "name": 
parsed["name"]})
+                    if parsed["org"] not in TRUSTED_ORGS:
+                        third_party.append({"file": wf_name, "action": 
parsed["raw"],
+                                            "org": parsed["org"], "name": 
parsed["name"]})
+
+            if unpinned:
+                by_action = {}
+                for u in unpinned:
+                    by_action.setdefault(u["name"], []).append(u["file"])
+                top = sorted(by_action.items(), key=lambda x: -len(x[1]))[:5]
+                detail_parts = [f"`{name}` ({len(files)})" for name, files in 
top]
+                repo_findings.append({
+                    "check": "unpinned_actions", "severity": "MEDIUM",
+                    "file": "(repo-wide)",
+                    "detail": (f"{len(unpinned)} unpinned action refs (mutable 
tags). "
+                               f"Top: {', '.join(detail_parts)}."),
+                    "count": len(unpinned), "total_refs": len(all_action_refs),
+                })
+
+            if third_party:
+                unique = sorted(set(t["name"] for t in third_party))
+                repo_findings.append({
+                    "check": "third_party_actions", "severity": "INFO",
+                    "file": "(repo-wide)",
+                    "detail": (f"{len(unique)} third-party actions: "
+                               f"{', '.join(unique[:10])}"
+                               + (f" +{len(unique)-10} more" if len(unique) > 
10 else "")),
+                    "count": len(unique),
+                })
+
+            # --- Fetch extra files from GitHub ---
+
+            # Check 7: CODEOWNERS
+            resp = await 
github_get(f"{GITHUB_API}/repos/{owner}/{repo_name}/contents/.github/CODEOWNERS")
+            if resp and resp.status_code == 200:
+                try:
+                    co_url = resp.json().get("download_url")
+                    if co_url:
+                        co_resp = await http_client.get(co_url, 
follow_redirects=True, timeout=10.0)
+                        if co_resp.status_code == 200:
+                            co_content = co_resp.text
+                            has_github_rule = any(".github" in line and not 
line.strip().startswith("#")
+                                                  for line in 
co_content.split("\n"))
+                            if not has_github_rule:
+                                repo_findings.append({
+                                    "check": "codeowners_gap", "severity": 
"LOW",
+                                    "file": "CODEOWNERS",
+                                    "detail": "CODEOWNERS exists but has no 
rule covering `.github/`. "
+                                              "Workflow changes can bypass 
security-focused review.",
+                                })
+                except Exception:
+                    pass
+            elif resp and resp.status_code == 404:
+                repo_findings.append({
+                    "check": "missing_codeowners", "severity": "LOW",
+                    "file": "(missing)",
+                    "detail": "No CODEOWNERS file. Workflow changes have no 
mandatory review.",
+                })
+
+            # Check 8: Dependabot / Renovate
+            has_deps = False
+            for path in [".github/dependabot.yml", ".github/dependabot.yaml",
+                         "renovate.json", ".github/renovate.json", 
".renovaterc.json"]:
+                resp = await 
github_get(f"{GITHUB_API}/repos/{owner}/{repo_name}/contents/{path}")
+                if resp and resp.status_code == 200:
+                    has_deps = True
+                    break
+
+            if not has_deps:
+                repo_findings.append({
+                    "check": "missing_dependency_updates", "severity": "INFO",
+                    "file": "(missing)",
+                    "detail": "No dependabot.yml or renovate.json found.",
+                })
+
+            # Check 9: Composite actions via recursive Git Trees API
+            # One API call gets the entire tree, handles any nesting depth
+            composite_findings = []
+            composite_analyzed = 0
+            composite_total = 0
+
+            resp = await github_get(
+                
f"{GITHUB_API}/repos/{owner}/{repo_name}/git/trees/HEAD?recursive=1")
+            if resp and resp.status_code == 200:
+                try:
+                    tree = resp.json().get("tree", [])
+                    action_files = [
+                        item["path"] for item in tree
+                        if item.get("path", "").startswith(".github/actions/")
+                        and item.get("path", "").endswith(("/action.yml", 
"/action.yaml"))
+                        and item.get("type") == "blob"
+                    ]
+                    composite_total = len(action_files)
+
+                    for action_path in action_files:
+                        # Extract action name: 
.github/actions/build/rust/action.yml -> build/rust
+                        action_name = action_path.replace(".github/actions/", 
"").rsplit("/", 1)[0]
+
+                        # Fetch the action.yml content
+                        aresp = await github_get(
+                            
f"{GITHUB_API}/repos/{owner}/{repo_name}/contents/{action_path}")
+                        if not aresp or aresp.status_code != 200:
+                            continue
+
+                        try:
+                            dl_url = aresp.json().get("download_url")
+                            if not dl_url:
+                                continue
+                            dl_resp = await http_client.get(dl_url, 
follow_redirects=True, timeout=10.0)
+                            if dl_resp.status_code != 200:
+                                continue
+                            action_content = dl_resp.text
+                        except Exception:
+                            continue
+
+                        composite_analyzed += 1
+                        short_path = 
f".github/actions/{action_name}/action.yml"
+
+                        # Store for other agents
+                        workflow_ns.set(f"{repo_name}/{short_path}", 
action_content)
+
+                        # Run injection checks
+                        context = f"composite action 
.github/actions/{action_name}"
+                        injections = 
find_injection_in_run_blocks(action_content, context_label=context)
+                        for sev, detail in injections:
+                            composite_findings.append({
+                                "check": "composite_action_injection",
+                                "severity": sev,
+                                "file": short_path,
+                                "detail": detail,
+                            })
+
+                        # Check unpinned actions inside composite
+                        ca_refs = extract_action_refs(action_content)
+                        for ref in ca_refs:
+                            parsed = parse_action_ref(ref)
+                            if parsed["type"] == "remote" and not 
parsed["pinned"]:
+                                composite_findings.append({
+                                    "check": "composite_action_unpinned",
+                                    "severity": "MEDIUM",
+                                    "file": short_path,
+                                    "detail": (f"Composite action uses 
unpinned action `{parsed['raw']}`. "
+                                               "Supply chain risk."),
+                                })
+
+                        # Check inputs.* directly in run blocks (hidden 
injection)
+                        has_input_injection = False
+                        in_run = False
+                        run_indent = 0
+                        for cline in action_content.split("\n"):
+                            cs = cline.strip()
+                            if cs.startswith("run:"):
+                                in_run = True
+                                run_indent = len(cline) - len(cline.lstrip())
+                                rest = cs[4:].strip()
+                                if rest.startswith("|") or 
rest.startswith(">"):
+                                    continue
+                                if "inputs." in rest and "${{" in rest:
+                                    has_input_injection = True
+                                    break
+                            elif in_run:
+                                ci = len(cline) - len(cline.lstrip())
+                                if cs and ci <= run_indent:
+                                    in_run = False
+                                elif "inputs." in cline and "${{" in cline:
+                                    has_input_injection = True
+                                    break
+
+                        if has_input_injection:
+                            composite_findings.append({
+                                "check": "composite_action_input_injection",
+                                "severity": "HIGH",
+                                "file": short_path,
+                                "detail": (f"Composite action `{action_name}` 
directly interpolates "
+                                           "`inputs.*` in run block. Callers 
may pass untrusted values — "
+                                           "the injection is hidden from 
workflow-level analysis."),
+                            })
+
+                except Exception as e:
+                    print(f"  Error scanning composite actions for 
{repo_name}: {str(e)[:100]}", flush=True)
+
+            # Deduplicate composite findings per file before adding
+            composite_findings = deduplicate_findings(composite_findings)
+            repo_findings.extend(composite_findings)
+
+            if composite_total > 0:
+                repo_findings.append({
+                    "check": "composite_actions_scanned", "severity": "INFO",
+                    "file": ".github/actions/",
+                    "detail": (f"{composite_analyzed}/{composite_total} 
composite actions analyzed. "
+                               f"{len(composite_findings)} finding(s)."),
+                })
+
+            # Deduplicate all findings for this repo
+            repo_findings = deduplicate_findings(repo_findings)
+
+            # Store
+            security_ns.set(f"findings:{repo_name}", repo_findings)
+            if repo_findings:
+                all_findings[repo_name] = repo_findings
+
+            await asyncio.sleep(0.1)
+
+        print(f"\n{'=' * 60}", flush=True)
+        print(f"Security scan complete! {repos_scanned} repos", flush=True)
+        total_findings = sum(len(f) for f in all_findings.values())
+        print(f"Total findings: {total_findings} across {len(all_findings)} 
repos", flush=True)
+        print(f"{'=' * 60}\n", flush=True)
+
+        # ===== Build report =====
+        report_title = f"CI Security Scan: {owner}"
+
+        severity_counts = {}
+        check_counts = {}
+        for repo, findings in all_findings.items():
+            for f in findings:
+                sev = f.get("severity", "INFO")
+                severity_counts[sev] = severity_counts.get(sev, 0) + 1
+                chk = f.get("check", "unknown")
+                check_counts[chk] = check_counts.get(chk, 0) + 1
+
+        lines = []
+        lines.append(f"Analyzed **{repos_scanned}** repositories using cached 
workflow YAML "
+                     f"from the Publishing Analyzer.\n")
+
+        lines.append("## Executive Summary\n")
+        lines.append("| Severity | Count |")
+        lines.append("|----------|-------|")
+        for sev in ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"]:
+            count = severity_counts.get(sev, 0)
+            if count > 0:
+                lines.append(f"| **{sev}** | **{count}** |")
+        lines.append("")
+
+        check_descriptions = {
+            "prt_checkout": "pull_request_target + checkout of untrusted PR 
code",
+            "self_hosted_runner": "Self-hosted runners exposed to PR triggers",
+            "broad_permissions": "Overly broad GITHUB_TOKEN permissions",
+            "cache_poisoning": "Potential cache poisoning via PR-controlled 
keys",
+            "run_block_injection": "Direct ${{ }} interpolation in workflow 
run blocks",
+            "unpinned_actions": "Mutable tag refs (not SHA-pinned)",
+            "third_party_actions": "Actions from unverified third-party 
sources",
+            "codeowners_gap": "CODEOWNERS missing .github/ coverage",
+            "missing_codeowners": "No CODEOWNERS file",
+            "missing_dependency_updates": "No dependabot/renovate 
configuration",
+            "composite_actions_scanned": "Composite actions analyzed",
+            "composite_action_injection": "Injection in composite action run 
block",
+            "composite_action_unpinned": "Unpinned action ref inside composite 
action",
+            "composite_action_input_injection": "Composite action passes 
inputs.* directly to run block",
+        }
+
+        lines.append("## Findings by Check Type\n")
+        lines.append("| Check | Count | Description |")
+        lines.append("|-------|-------|-------------|")
+        for chk, count in sorted(check_counts.items(), key=lambda x: -x[1]):
+            desc = check_descriptions.get(chk, chk)
+            lines.append(f"| {chk} | {count} | {desc} |")
+        lines.append("")
+
+        by_severity = {"CRITICAL": [], "HIGH": [], "MEDIUM": [], "LOW": [], 
"INFO": []}
+        for repo, findings in all_findings.items():
+            for f in findings:
+                sev = f.get("severity", "INFO")
+                if sev in by_severity:
+                    by_severity[sev].append((repo, f))
+
+        if by_severity["CRITICAL"]:
+            lines.append("## CRITICAL Findings\n")
+            lines.append("Untrusted external input directly interpolated in 
shell execution contexts.\n")
+            for repo, f in sorted(by_severity["CRITICAL"], key=lambda x: 
(x[0], x[1].get("file", ""))):
+                lines.append(f"- **{owner}/{repo}** (`{f['file']}`): 
[{f['check']}] {f['detail']}")
+            lines.append("")
+
+        if by_severity["HIGH"]:
+            lines.append("## HIGH Findings\n")
+            for repo, f in sorted(by_severity["HIGH"], key=lambda x: (x[0], 
x[1].get("file", ""))):
+                lines.append(f"- **{owner}/{repo}** (`{f['file']}`): 
[{f['check']}] {f['detail']}")
+            lines.append("")
+
+        if by_severity["MEDIUM"]:
+            lines.append("## MEDIUM Findings\n")
+            lines.append(f"<details>\n<summary>Show 
{len(by_severity['MEDIUM'])} medium findings</summary>\n")
+            for repo, f in sorted(by_severity["MEDIUM"], key=lambda x: (x[0], 
x[1].get("file", ""))):
+                lines.append(f"- **{owner}/{repo}** (`{f['file']}`): 
[{f['check']}] {f['detail']}")
+            lines.append(f"\n</details>\n")
+
+        if by_severity["LOW"]:
+            lines.append("## LOW Findings\n")
+            lines.append(f"<details>\n<summary>Show {len(by_severity['LOW'])} 
low findings</summary>\n")
+            for repo, f in sorted(by_severity["LOW"], key=lambda x: (x[0], 
x[1].get("file", ""))):
+                lines.append(f"- **{owner}/{repo}** (`{f['file']}`): 
[{f['check']}] {f['detail']}")
+            lines.append(f"\n</details>\n")
+
+        if by_severity["INFO"]:
+            lines.append("## INFO Findings\n")
+            lines.append(f"<details>\n<summary>Show {len(by_severity['INFO'])} 
info findings</summary>\n")
+            for repo, f in sorted(by_severity["INFO"], key=lambda x: (x[0], 
x[1].get("file", ""))):
+                lines.append(f"- **{owner}/{repo}** (`{f['file']}`): 
[{f['check']}] {f['detail']}")
+            lines.append(f"\n</details>\n")
+
+        lines.append("## Detailed Results by Repository\n")
+        for repo in sorted(all_findings.keys()):
+            findings = all_findings[repo]
+            if not findings:
+                continue
+            sev_summary = {}
+            for f in findings:
+                sev_summary[f["severity"]] = sev_summary.get(f["severity"], 0) 
+ 1
+            sev_str = ", ".join(f"{s}: {c}" for s, c in 
sorted(sev_summary.items()))
+
+            lines.append(f"### {owner}/{repo}\n")
+            lines.append(f"**{len(findings)}** findings | {sev_str}\n")
+
+            sev_order = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3, 
"INFO": 4}
+            for f in sorted(findings, key=lambda x: 
sev_order.get(x["severity"], 99)):
+                lines.append(f"- **[{f['severity']}]** `{f['file']}` — 
[{f['check']}] {f['detail']}")
+            lines.append("")
+
+        lines.append("---\n")
+        lines.append(f"*Findings cached in `ci-security:{owner}`. "
+                     f"Set `clear_cache` to `true` to re-scan.*")
+
+        report_body = "\n".join(lines)
+
+        def to_anchor(text):
+            anchor = text.lower().strip()
+            anchor = re.sub(r'[^\w\s-]', '', anchor)
+            anchor = re.sub(r'\s+', '-', anchor)
+            anchor = re.sub(r'-+', '-', anchor)
+            return anchor.strip('-')
+
+        toc_lines = [f"# {report_title}\n", "## Contents\n"]
+        toc_lines.append(f"- [Executive Summary](#{to_anchor('Executive 
Summary')})")
+        toc_lines.append(f"- [Findings by Check Type](#{to_anchor('Findings by 
Check Type')})")
+        for sev in ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"]:
+            if by_severity.get(sev):
+                toc_lines.append(f"- [{sev} Findings](#{to_anchor(f'{sev} 
Findings')}) ({len(by_severity[sev])})")
+        toc_lines.append(f"- [Detailed Results](#{to_anchor('Detailed Results 
by Repository')})")
+        for repo in sorted(all_findings.keys()):
+            toc_lines.append(f"  - 
[{owner}/{repo}](#{to_anchor(f'{owner}/{repo}')})")
+
+        toc = "\n".join(toc_lines)
+        full_report = toc + "\n\n---\n\n" + report_body
+
+        security_ns.set("latest_report", full_report)
+        security_ns.set("latest_stats", {
+            "repos_scanned": repos_scanned,
+            "repos_with_findings": len(all_findings),
+            "total_findings": total_findings,
+            "severity_counts": severity_counts,
+            "check_counts": check_counts,
+        })
+
+        return {"outputText": full_report}
+
+    finally:
+        await http_client.aclose()
\ No newline at end of file
diff --git a/repos/apache/github-review/agents/summary.py 
b/repos/apache/github-review/agents/summary.py
new file mode 100644
index 0000000..abbee50
--- /dev/null
+++ b/repos/apache/github-review/agents/summary.py
@@ -0,0 +1,406 @@
+from agent_factory.remote_mcp_client import RemoteMCPClient
+from services.llm_service import call_llm
+import httpx
+import re
+
+async def run(input_dict, tools):
+    mcpc = { url : RemoteMCPClient(remote_url = url) for url in tools.keys() }
+    http_client = httpx.AsyncClient()
+    try:
+        owner = input_dict.get("owner", "apache")
+        print(f"Agent 3 starting for owner={owner}", flush=True)
+
+        report_ns = data_store.use_namespace(f"ci-report:{owner}")
+        security_ns = data_store.use_namespace(f"ci-security:{owner}")
+
+        pub_stats = report_ns.get("latest_stats")
+        sec_stats = security_ns.get("latest_stats")
+        pub_report = report_ns.get("latest_report")
+        sec_report = security_ns.get("latest_report")
+
+        if not pub_stats or not sec_stats:
+            return {"outputText": "Error: Run Agent 1 and Agent 2 first."}
+
+        print(f"Publishing report: {len(pub_report or '')} chars", flush=True)
+        print(f"Security report: {len(sec_report or '')} chars", flush=True)
+
+        # --- Parse per-repo ecosystems from publishing report text ---
+        # Matches lines like: ### apache/iggy\n**4** ... | Ecosystems: 
**crates_io, docker_hub** | ...
+        repo_ecosystems = {}
+        repo_categories = {}  # repo -> {release: N, snapshot: N}
+        if pub_report:
+            # Match detailed results headers
+            header_pattern = re.compile(
+                r'### ' + re.escape(f'{owner}/') + r'(\S+)\s*\n+'
+                r'\*\*(\d+)\*\* release/snapshot workflows \| Ecosystems: 
\*\*([^*]+)\*\*'
+                r' \|(.+)')
+            for m in header_pattern.finditer(pub_report):
+                repo = m.group(1)
+                ecosystems = [e.strip() for e in m.group(3).split(",")]
+                repo_ecosystems[repo] = ecosystems
+                cats_str = m.group(4)
+                cats = {}
+                for cat_m in re.finditer(r'(Release Artifacts|Snapshot[^:]*): 
(\d+)', cats_str):
+                    if "Release" in cat_m.group(1):
+                        cats["release"] = int(cat_m.group(2))
+                    else:
+                        cats["snapshot"] = int(cat_m.group(2))
+                repo_categories[repo] = cats
+
+        print(f"Parsed ecosystems for {len(repo_ecosystems)} repos", 
flush=True)
+
+        # --- Read per-repo security findings ---
+        all_sec_keys = security_ns.list_keys()
+        finding_keys = [k for k in all_sec_keys if k.startswith("findings:")]
+
+        repo_security = {}  # repo -> {severities, total, worst, top_checks}
+        SEV_ORDER = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3, "INFO": 
4}
+
+        for k in finding_keys:
+            repo = k.replace("findings:", "")
+            findings = security_ns.get(k)
+            if not findings or not isinstance(findings, list):
+                continue
+
+            sev_counts = {}
+            check_counts = {}
+            for f in findings:
+                sev = f.get("severity", "INFO")
+                sev_counts[sev] = sev_counts.get(sev, 0) + 1
+                chk = f.get("check", "unknown")
+                # Skip info-level noise for top checks
+                if sev != "INFO":
+                    check_counts[chk] = check_counts.get(chk, 0) + 1
+
+            worst = "INFO"
+            for s in ["CRITICAL", "HIGH", "MEDIUM", "LOW"]:
+                if sev_counts.get(s, 0) > 0:
+                    worst = s
+                    break
+
+            # Top 3 non-INFO checks by count
+            top_checks = sorted(check_counts.items(), key=lambda x: -x[1])[:3]
+
+            repo_security[repo] = {
+                "severities": sev_counts,
+                "total": len(findings),
+                "worst": worst,
+                "top_checks": top_checks,
+            }
+
+        print(f"Security data for {len(repo_security)} repos", flush=True)
+
+        # --- Parse trusted publishing opportunities ---
+        # Which repos have TP opportunities (using long-lived tokens where 
OIDC is available)
+        tp_repos = set()
+        tp_opportunities = pub_stats.get("trusted_publishing_opportunities", 
[])
+        if isinstance(tp_opportunities, list):
+            for opp in tp_opportunities:
+                if isinstance(opp, dict):
+                    tp_repos.add(opp.get("repo", ""))
+                elif isinstance(opp, str):
+                    tp_repos.add(opp)
+        # Also try parsing from report text for reliability
+        if pub_report:
+            tp_section = False
+            for line in pub_report.split("\n"):
+                if "Trusted Publishing Migration" in line:
+                    tp_section = True
+                    continue
+                if tp_section and line.startswith("## ") and "Trusted" not in 
line:
+                    break
+                if tp_section and "| " in line and "`" in line:
+                    parts = line.split("|")
+                    if len(parts) > 1:
+                        repo_name = parts[1].strip()
+                        if repo_name and repo_name != "Repository":
+                            tp_repos.add(repo_name)
+
+        print(f"Trusted publishing opportunity repos: {len(tp_repos)}", 
flush=True)
+
+        # --- Identify repos already using OIDC ---
+        oidc_repos = set()
+        if pub_report:
+            for line in pub_report.split("\n"):
+                if "OIDC" in line and ("trusted publishing" in line.lower() or 
"id-token" in line.lower()):
+                    # Find which repo section we're in
+                    pass  # Complex to parse; skip for now
+
+        # --- Build combined risk table ---
+        publishing_repos = set(pub_stats.get("publishing_repos", []))
+        all_repos = publishing_repos | set(repo_security.keys())
+
+        repo_rows = []
+        for repo in sorted(all_repos):
+            ecosystems = repo_ecosystems.get(repo, [])
+            sec = repo_security.get(repo, {})
+            worst = sec.get("worst", "—")
+            total = sec.get("total", 0)
+            sev_counts = sec.get("severities", {})
+            top_checks = sec.get("top_checks", [])
+            cats = repo_categories.get(repo, {})
+            publishes = repo in publishing_repos
+            has_tp_opportunity = repo in tp_repos
+
+            # Risk score for sorting: publishing breadth * security severity
+            eco_score = len(ecosystems) if ecosystems else (1 if publishes 
else 0)
+            sev_score = {"CRITICAL": 100, "HIGH": 50, "MEDIUM": 10, "LOW": 3, 
"INFO": 1, "—": 0}.get(worst, 0)
+            risk_score = eco_score * sev_score + total
+
+            repo_rows.append({
+                "repo": repo,
+                "ecosystems": ecosystems,
+                "publishes": publishes,
+                "worst": worst,
+                "total": total,
+                "sev_counts": sev_counts,
+                "top_checks": top_checks,
+                "cats": cats,
+                "has_tp": has_tp_opportunity,
+                "risk_score": risk_score,
+            })
+
+        repo_rows.sort(key=lambda r: -r["risk_score"])
+
+        # --- Classify into tiers ---
+        critical_repos = [r for r in repo_rows if r["worst"] == "CRITICAL"]
+        high_repos = [r for r in repo_rows if r["worst"] == "HIGH"]
+        medium_repos = [r for r in repo_rows if r["worst"] == "MEDIUM" and 
r["publishes"]]
+        low_repos = [r for r in repo_rows if r["worst"] in ("LOW", "INFO", 
"—") and r["publishes"]]
+
+        # --- Generate report ---
+        PUB = "apache-github-publishing.md"
+        SEC = "apache-github-security.md"
+
+        def anchor(text):
+            a = text.lower().strip()
+            a = re.sub(r'[^\w\s-]', '', a)
+            a = re.sub(r'\s+', '-', a)
+            a = re.sub(r'-+', '-', a)
+            return a.strip('-')
+
+        def repo_pub_link(repo):
+            return f"[publishing]({PUB}#{anchor(f'{owner}/{repo}')})"
+
+        def repo_sec_link(repo):
+            return f"[security]({SEC}#{anchor(f'{owner}/{repo}')})"
+
+        def eco_str(ecosystems):
+            if not ecosystems:
+                return "—"
+            return ", ".join(ecosystems)
+
+        def sev_summary(sev_counts):
+            parts = []
+            for s in ["CRITICAL", "HIGH", "MEDIUM", "LOW"]:
+                c = sev_counts.get(s, 0)
+                if c > 0:
+                    parts.append(f"{c} {s}")
+            return ", ".join(parts) if parts else "INFO only"
+
+        def check_summary(top_checks):
+            if not top_checks:
+                return ""
+            return ", ".join(f"{chk} ({n})" for chk, n in top_checks)
+
+        lines = []
+        lines.append(f"# Apache GitHub Review: Combined Risk Assessment\n")
+        lines.append(f"Cross-referencing CI publishing analysis with security 
scan results "
+                     f"across **{len(all_repos)}** repositories.\n")
+
+        lines.append("## Companion Reports\n")
+        lines.append(f"| Report | Description |")
+        lines.append(f"|--------|-------------|")
+        lines.append(f"| [{PUB}]({PUB}) | Which repos publish packages to 
registries, "
+                     f"what ecosystems, auth methods, trusted publishing 
opportunities. "
+                     f"{pub_stats.get('total_workflows', '?')} workflows 
across "
+                     f"{pub_stats.get('repos_scanned', '?')} repos. |")
+        lines.append(f"| [{SEC}]({SEC}) | Pattern-matching security checks on 
cached workflow YAML: "
+                     f"injection patterns, unpinned actions, permissions, 
composite action analysis. "
+                     f"{sec_stats.get('total_findings', '?')} findings across "
+                     f"{sec_stats.get('repos_with_findings', '?')} repos. |")
+        lines.append("")
+
+        # --- At a glance ---
+        lines.append("## At a Glance\n")
+        lines.append(f"| Metric | Value |")
+        lines.append(f"|--------|-------|")
+        lines.append(f"| Repos scanned | {pub_stats.get('repos_scanned', '?')} 
|")
+        lines.append(f"| Repos publishing to registries | 
{len(publishing_repos)} |")
+        lines.append(f"| Total security findings | 
{sec_stats.get('total_findings', '?')} |")
+        sev = sec_stats.get("severity_counts", {})
+        lines.append(f"| CRITICAL findings | {sev.get('CRITICAL', 0)} |")
+        lines.append(f"| HIGH findings | {sev.get('HIGH', 0)} |")
+        lines.append(f"| Repos needing trusted publishing migration | 
{len(tp_repos)} |")
+        eco = pub_stats.get("ecosystem_counts", {})
+        # Filter out documentation/CI targets for the publishing risk summary
+        doc_targets = {"codecov", "github_pages", "surge_sh", "s3", "gcr"}
+        release_eco = {k: v for k, v in eco.items() if k not in doc_targets}
+        top_eco = sorted(release_eco.items(), key=lambda x: -x[1])[:5]
+        eco_summary = ", ".join(f"{e} ({c})" for e, c in top_eco)
+        lines.append(f"| Top ecosystems | {eco_summary} |")
+        lines.append("")
+
+        # --- CRITICAL + HIGH tier ---
+        if critical_repos or high_repos:
+            lines.append("## Immediate Attention Required\n")
+            lines.append("Repos with CRITICAL or HIGH security findings that 
also publish packages.\n")
+
+            for r in critical_repos + high_repos:
+                repo = r["repo"]
+                lines.append(f"### {owner}/{repo}\n")
+
+                eco_display = eco_str(r["ecosystems"])
+                cat_parts = []
+                if r["cats"].get("release"):
+                    cat_parts.append(f"{r['cats']['release']} release")
+                if r["cats"].get("snapshot"):
+                    cat_parts.append(f"{r['cats']['snapshot']} snapshot")
+                cat_display = ", ".join(cat_parts) if cat_parts else ""
+
+                details = []
+                if r["publishes"] and r["ecosystems"]:
+                    pub_line = f"**Publishes to:** {eco_display}"
+                    if cat_display:
+                        pub_line += f" ({cat_display})"
+                    details.append(pub_line)
+                elif r["publishes"]:
+                    details.append(f"**Publishes:** yes (see 
{repo_pub_link(repo)})")
+
+                details.append(f"**Security:** {r['total']} findings — 
{sev_summary(r['sev_counts'])}")
+
+                if r["top_checks"]:
+                    details.append(f"**Top issues:** 
{check_summary(r['top_checks'])}")
+
+                if r["has_tp"]:
+                    details.append(f"**Trusted publishing:** migration 
opportunity — currently using long-lived tokens "
+                                   
f"([details]({PUB}#trusted-publishing-migration-opportunities))")
+
+                details.append(f"**Details:** {repo_pub_link(repo)} · 
{repo_sec_link(repo)}")
+
+                # Join with double-space + newline for markdown line breaks
+                lines.append("  \n".join(details))
+                lines.append("")
+
+        # --- MEDIUM tier: publishing repos ---
+        if medium_repos:
+            lines.append("## Moderate Risk: Publishing Repos with MEDIUM 
Findings\n")
+            lines.append("These repos publish packages and have 
MEDIUM-severity findings (typically unpinned actions).\n")
+            lines.append(f"| Repo | Ecosystems | Findings | Top Issue | 
Trusted Pub | Details |")
+            
lines.append(f"|------|-----------|----------|-----------|------------|---------|")
+
+            for r in medium_repos:
+                repo = r["repo"]
+                eco = eco_str(r["ecosystems"]) if r["ecosystems"] else "npm"
+                top = r["top_checks"][0][0] if r["top_checks"] else 
"unpinned_actions"
+                tp = "migrate" if r["has_tp"] else "—"
+                links = f"{repo_pub_link(repo)} · {repo_sec_link(repo)}"
+                lines.append(f"| {owner}/{repo} | {eco} | {r['total']} | {top} 
| {tp} | {links} |")
+
+            lines.append("")
+
+        # --- LOW tier summary ---
+        if low_repos:
+            lines.append("## Low Risk: Publishing Repos\n")
+            lines.append(f"{len(low_repos)} repos publish packages with only 
LOW/INFO-level security findings "
+                         f"(missing CODEOWNERS, no dependabot config).\n")
+            lines.append(f"<details>\n<summary>Show {len(low_repos)} 
repos</summary>\n")
+            for r in low_repos:
+                repo = r["repo"]
+                eco = eco_str(r["ecosystems"]) if r["ecosystems"] else "—"
+                lines.append(f"- **{owner}/{repo}** — {eco} — {r['total']} 
findings "
+                             f"({repo_pub_link(repo)} · 
{repo_sec_link(repo)})")
+            lines.append(f"\n</details>\n")
+
+        # --- Trusted Publishing summary ---
+        lines.append("## Trusted Publishing Opportunities\n")
+        lines.append(f"**{len(tp_repos)}** repos use long-lived tokens to 
publish to ecosystems that support "
+                     f"OIDC trusted publishing. Migrating eliminates stored 
secrets.\n")
+        lines.append(f"Full details: [{PUB} → Trusted Publishing]"
+                     f"({PUB}#trusted-publishing-migration-opportunities)\n")
+
+        # Group by ecosystem from pub_report
+        tp_ecosystems = {}
+        if pub_report:
+            current_eco = None
+            for line in pub_report.split("\n"):
+                if line.startswith("### ") and "Trusted Publishing" not in 
line:
+                    # Check if this is an ecosystem header inside TP section
+                    eco_name = line[4:].strip()
+                    if eco_name in ("crates.io", "npm", "NuGet", "PyPI", 
"RubyGems"):
+                        current_eco = eco_name
+                        continue
+                if current_eco and line.startswith("## "):
+                    current_eco = None
+                    continue
+                if current_eco and "| " in line and "`" in line:
+                    parts = [p.strip() for p in line.split("|")]
+                    if len(parts) > 2 and parts[1] and parts[1] != 
"Repository":
+                        tp_ecosystems.setdefault(current_eco, 
[]).append(parts[1])
+
+        for eco, repos in sorted(tp_ecosystems.items()):
+            unique = sorted(set(repos))
+            lines.append(f"- **{eco}**: {', '.join(unique)}")
+        lines.append("")
+
+        # --- Key recommendations ---
+        lines.append("## Key Recommendations\n")
+
+        rec_num = 1
+
+        if critical_repos:
+            crit_names = ", ".join("`" + r["repo"] + "`" for r in 
critical_repos)
+            verb = "has" if len(critical_repos) == 1 else "have"
+            lines.append(f"{rec_num}. **Fix CRITICAL findings immediately.** "
+                         f"{crit_names} {verb} "
+                         f"exploitable vulnerabilities in publishing 
workflows.")
+            rec_num += 1
+
+        lines.append(f"{rec_num}. **Migrate to trusted publishing.** "
+                     f"{len(tp_repos)} repos can eliminate long-lived secrets 
by adopting OIDC. "
+                     f"Start with repos publishing to PyPI and npm — "
+                     f"[migration 
guide]({PUB}#trusted-publishing-migration-opportunities).")
+        rec_num += 1
+
+        if high_repos:
+            # Count all repos that have any HIGH findings (not just worst=HIGH)
+            repos_with_high = [r for r in repo_rows
+                               if r["sev_counts"].get("HIGH", 0) > 0]
+            lines.append(f"{rec_num}. **Review composite action injection 
patterns.** "
+                         f"{len(repos_with_high)} repos have HIGH findings 
from `inputs.*` directly interpolated "
+                         f"in composite action run blocks. While these are 
called from trusted contexts today, "
+                         f"they create hidden injection surfaces.")
+            rec_num += 1
+
+        lines.append(f"{rec_num}. **Pin actions to SHA hashes.** "
+                     f"All {sec_stats.get('repos_with_findings', '?')} repos 
use mutable tag refs. "
+                     f"See the [unpinned actions 
findings]({SEC}#medium-findings) for per-repo counts.")
+        rec_num += 1
+
+        # Count repos with missing_codeowners or codeowners_gap findings
+        no_codeowners = 0
+        for repo, sec in repo_security.items():
+            for chk, cnt in sec.get("top_checks", []):
+                pass  # top_checks doesn't have all checks
+        # Count from check_counts in sec_stats
+        codeowners_missing = sec_stats.get("check_counts", 
{}).get("missing_codeowners", 0)
+        codeowners_gap = sec_stats.get("check_counts", 
{}).get("codeowners_gap", 0)
+        lines.append(f"{rec_num}. **Add CODEOWNERS with `.github/` coverage.** 
"
+                     f"{codeowners_missing} repos have no CODEOWNERS file and "
+                     f"{codeowners_gap} have CODEOWNERS without `.github/` 
rules. "
+                     f"Workflow changes can bypass security review.")
+        lines.append("")
+
+        lines.append("---\n")
+        lines.append(f"*Generated from [{PUB}]({PUB}) and [{SEC}]({SEC}).*")
+
+        full_report = "\n".join(lines)
+        print(f"Report length: {len(full_report)} chars", flush=True)
+
+        combined_ns = data_store.use_namespace(f"ci-combined:{owner}")
+        combined_ns.set("latest_report", full_report)
+
+        return {"outputText": full_report}
+
+    finally:
+        await http_client.aclose()
\ No newline at end of file
diff --git a/repos/apache/github-review/monitor-agent.sh 
b/repos/apache/github-review/monitor-agent.sh
deleted file mode 100755
index 4f08305..0000000
--- a/repos/apache/github-review/monitor-agent.sh
+++ /dev/null
@@ -1,176 +0,0 @@
-#!/bin/bash
-# monitor-agent.sh — tmux dashboard for CI analyzer agents
-# Monitors: Publishing Analyzer, Security Scanner, Report Combiner
-# Usage: chmod +x monitor-agent.sh && ./monitor-agent.sh
-
-COUCH_URL="http://user:password@localhost:5984";
-DB="agent_data_store"
-SESSION="ci-monitor"
-OWNER="apache"
-
-# Kill existing session if any
-tmux kill-session -t "$SESSION" 2>/dev/null
-
-tmux new-session -d -s "$SESSION" -x 220 -y 60
-
-# ── Pane 0 (top-left): All namespace doc counts grouped by agent ──
-tmux send-keys "watch -n 5 'echo \"=== Doc Counts (all agents) ===\"; echo; \
-curl -s \"${COUCH_URL}/${DB}/_find\" \
-  -H \"Content-Type: application/json\" \
-  -d 
\"{\\\"selector\\\":{\\\"namespace\\\":{\\\"\\\$in\\\":[\\\"ci-classification:${OWNER}\\\",\\\"ci-workflows:${OWNER}\\\",\\\"ci-report:${OWNER}\\\",\\\"ci-security:${OWNER}\\\",\\\"ci-combined:${OWNER}\\\"]}},\\\"fields\\\":[\\\"namespace\\\"],\\\"limit\\\":9999}\"
 \
-| python3 -c \"
-import sys, json
-from collections import Counter
-docs = json.load(sys.stdin)[\\\"docs\\\"]
-counts = Counter(d[\\\"namespace\\\"] for d in docs)
-print(f\\\"Total: {len(docs)} docs\\\")
-print()
-groups = {
-    \\\"Agent 1 (Publishing)\\\": [\\\"ci-classification:${OWNER}\\\", 
\\\"ci-workflows:${OWNER}\\\", \\\"ci-report:${OWNER}\\\"],
-    \\\"Agent 2 (Security)\\\": [\\\"ci-security:${OWNER}\\\"],
-    \\\"Agent 3 (Combined)\\\": [\\\"ci-combined:${OWNER}\\\"],
-}
-for label, namespaces in groups.items():
-    group_total = sum(counts.get(ns, 0) for ns in namespaces)
-    print(f\\\"{label}: {group_total} docs\\\")
-    for ns in namespaces:
-        c = counts.get(ns, 0)
-        short = ns.split(\\\":\\\")[0]
-        print(f\\\"  {c}\\t{short}\\\")
-    print()
-if not docs:
-    print(\\\"  (all empty)\\\")
-\"'" C-m
-
-# ── Pane 1 (top-right): Completed repos + classification status ──
-tmux split-window -h
-tmux send-keys "watch -n 10 'echo \"=== Agent 1: Repo Status ===\"; echo; \
-curl -s \"${COUCH_URL}/${DB}/_find\" \
-  -H \"Content-Type: application/json\" \
-  -d 
\"{\\\"selector\\\":{\\\"namespace\\\":\\\"ci-classification:${OWNER}\\\",\\\"key\\\":{\\\"\\\$regex\\\":\\\"^__meta__:\\\"}},\\\"fields\\\":[\\\"key\\\",\\\"value\\\"],\\\"limit\\\":9999}\"
 \
-| python3 -c \"
-import sys, json
-docs = json.load(sys.stdin)[\\\"docs\\\"]
-done = [d for d in docs if d.get(\\\"value\\\", {}).get(\\\"complete\\\")]
-with_wf = [d for d in done if d.get(\\\"value\\\", {}).get(\\\"workflows\\\")]
-without_wf = [d for d in done if not d.get(\\\"value\\\", 
{}).get(\\\"workflows\\\")]
-print(f\\\"Completed: {len(done)} repos ({len(with_wf)} with workflows, 
{len(without_wf)} empty)\\\")
-print()
-for d in sorted(with_wf, key=lambda x: x[\\\"key\\\"]):
-    repo = d[\\\"key\\\"].replace(\\\"__meta__:\\\", \\\"\\\")
-    wfs = d[\\\"value\\\"].get(\\\"workflows\\\", [])
-    print(f\\\"  ✓ {repo}: {len(wfs)} workflows\\\")
-\"'" C-m
-
-# ── Pane 2 (middle-left): In-progress classification ──
-tmux select-pane -t 0
-tmux split-window -v
-tmux send-keys "watch -n 5 'echo \"=== Agent 1: In-Progress ===\"; echo; \
-curl -s \"${COUCH_URL}/${DB}/_find\" \
-  -H \"Content-Type: application/json\" \
-  -d 
\"{\\\"selector\\\":{\\\"namespace\\\":\\\"ci-classification:${OWNER}\\\",\\\"key\\\":{\\\"\\\$not\\\":{\\\"\\\$regex\\\":\\\"^__meta__:\\\"}}},\\\"fields\\\":[\\\"key\\\"],\\\"limit\\\":9999}\"
 \
-| python3 -c \"
-import sys, json, subprocess
-docs = json.load(sys.stdin)[\\\"docs\\\"]
-
-meta_raw = subprocess.run(
-    [\\\"curl\\\", \\\"-s\\\", \\\"${COUCH_URL}/${DB}/_find\\\",
-     \\\"-H\\\", \\\"Content-Type: application/json\\\",
-     \\\"-d\\\", json.dumps({\\\"selector\\\": {\\\"namespace\\\": 
\\\"ci-classification:${OWNER}\\\", \\\"key\\\": {\\\"\\\$regex\\\": 
\\\"^__meta__:\\\"}}, \\\"fields\\\": [\\\"key\\\", \\\"value\\\"], 
\\\"limit\\\": 9999})],
-    capture_output=True, text=True
-).stdout
-meta_docs = json.loads(meta_raw)[\\\"docs\\\"]
-complete_repos = {d[\\\"key\\\"].replace(\\\"__meta__:\\\", \\\"\\\") for d in 
meta_docs if d.get(\\\"value\\\", {}).get(\\\"complete\\\")}
-
-by_repo = {}
-for d in docs:
-    key = d[\\\"key\\\"]
-    repo = key.split(\\\":\\\")[0] if \\\":\\\" in key else \\\"unknown\\\"
-    by_repo.setdefault(repo, []).append(key)
-
-in_progress = {r: files for r, files in by_repo.items() if r not in 
complete_repos}
-
-if not in_progress:
-    print(\\\"No repos currently being classified.\\\")
-    print(f\\\"\\\\nTotal classified: {len(docs)} workflows across 
{len(by_repo)} repos\\\")
-else:
-    for repo, files in sorted(in_progress.items()):
-        print(f\\\"⏳ {repo}: {len(files)} classified so far\\\")
-        recent = sorted(files)[-15:]
-        for f in recent:
-            wf_name = f.split(\\\":\\\", 1)[1] if \\\":\\\" in f else f
-            print(f\\\"     {wf_name}\\\")
-        if len(files) > 15:
-            print(f\\\"     ... and {len(files) - 15} more\\\")
-    print(f\\\"\\\\nTotal classified: {len(docs)} workflows\\\")
-\"'" C-m
-
-# ── Pane 3 (middle-right): Agent 2 security findings summary ──
-tmux select-pane -t 1
-tmux split-window -v
-tmux send-keys "watch -n 5 'echo \"=== Agent 2: Security Findings ===\"; echo; 
\
-curl -s \"${COUCH_URL}/${DB}/_find\" \
-  -H \"Content-Type: application/json\" \
-  -d 
\"{\\\"selector\\\":{\\\"namespace\\\":\\\"ci-security:${OWNER}\\\"},\\\"fields\\\":[\\\"key\\\",\\\"value\\\"],\\\"limit\\\":9999}\"
 \
-| python3 -c \"
-import sys, json
-docs = json.load(sys.stdin)[\\\"docs\\\"]
-
-if not docs:
-    print(\\\"No security data yet.\\\")
-    print(\\\"Run Agent 2 after Agent 1 completes.\\\")
-else:
-    finding_docs = [d for d in docs if 
d[\\\"key\\\"].startswith(\\\"findings:\\\")]
-    meta_docs = [d for d in docs if d[\\\"key\\\"].startswith(\\\"latest_\\\")]
-    
-    repos_with_findings = 0
-    repos_clean = 0
-    total_findings = 0
-    severity_counts = {}
-    check_counts = {}
-    
-    for d in finding_docs:
-        findings = d.get(\\\"value\\\", [])
-        if isinstance(findings, list):
-            if findings:
-                repos_with_findings += 1
-                total_findings += len(findings)
-            else:
-                repos_clean += 1
-            for f in findings:
-                if isinstance(f, dict):
-                    sev = f.get(\\\"severity\\\", \\\"?\\\")
-                    severity_counts[sev] = severity_counts.get(sev, 0) + 1
-                    chk = f.get(\\\"check\\\", \\\"?\\\")
-                    check_counts[chk] = check_counts.get(chk, 0) + 1
-    
-    print(f\\\"Repos processed: {len(finding_docs)} ({repos_with_findings} 
with findings, {repos_clean} clean)\\\")
-    print(f\\\"Total findings: {total_findings}\\\")
-    print()
-    
-    if severity_counts:
-        print(\\\"By severity:\\\")
-        for sev in [\\\"CRITICAL\\\", \\\"HIGH\\\", \\\"MEDIUM\\\", 
\\\"LOW\\\", \\\"INFO\\\"]:
-            c = severity_counts.get(sev, 0)
-            if c > 0:
-                print(f\\\"  {sev}: {c}\\\")
-        print()
-    
-    if check_counts:
-        print(\\\"By check:\\\")
-        for chk, c in sorted(check_counts.items(), key=lambda x: -x[1])[:10]:
-            print(f\\\"  {c}\\t{chk}\\\")
-    
-    if meta_docs:
-        print()
-        print(\\\"Reports: \\\", \\\", \\\".join(d[\\\"key\\\"] for d in 
meta_docs))
-\"'" C-m
-
-# ── Pane 4 (bottom): Docker logs tail ──
-tmux select-pane -t 2
-tmux split-window -v
-tmux send-keys "docker compose logs -f --tail=100 api 2>&1 | grep 
--line-buffered -E '(Scanning|Progress|Rate limit|WARNING|ERROR|Scan 
complete|classified|cached|Preflight|Security 
scan|findings|composite|call_llm)'" C-m
-
-# Layout and attach
-tmux select-layout -t "$SESSION" tiled
-tmux attach -t "$SESSION"
\ No newline at end of file
diff --git a/repos/apache/github-review/report.md 
b/repos/apache/github-review/report.md
deleted file mode 100644
index 27e375a..0000000
--- a/repos/apache/github-review/report.md
+++ /dev/null
@@ -1,203 +0,0 @@
-# CI Registry Publishing Analysis: apache
-
-## Contents
-
-- [Executive Summary](#executive-summary)
-- [Package Ecosystem 
Distribution](#package-ecosystem-distribution-releases-snapshots-only)
-- [Release Artifact Workflows](#release-artifact-workflows) (5)
-- [Snapshot / Nightly Workflows](#snapshot-nightly-artifact-workflows) (1)
-- [CI Infrastructure Workflows](#ci-infrastructure-image-workflows) (24)
-- [Documentation Workflows](#documentation-website-workflows) (6)
-- [Security: Low Risk](#security-low-risk-findings) (11)
-- [Detailed Results](#detailed-results-release-snapshot-workflows)
-  - [apache/airflow](#apacheairflow)
-  - [apache/kafka](#apachekafka)
-  - [apache/spark](#apachespark)
-
----
-
-Scanned **3** repositories, **3** had GitHub Actions workflow files, **110** 
total workflows analyzed.
-
-## Executive Summary
-
-| Metric | Value |
-|--------|-------|
-| Repositories scanned | 3 |
-| Repositories with workflows | 3 |
-| Total workflow files | 110 |
-| **Repos with any publishing** | **3** |
-| Release artifact workflows | 5 |
-| Snapshot / nightly workflows | 1 |
-| CI infrastructure image workflows | 24 |
-| Documentation / website workflows | 6 |
-| Security notes flagged | 14 |
-
-## Package Ecosystem Distribution (releases + snapshots only)
-
-| Ecosystem | Workflows | Percentage |
-|-----------|-----------|------------|
-| docker_hub | 4 | 44.4% |
-| maven_central | 2 | 22.2% |
-| ghcr | 1 | 11.1% |
-| apache_dist | 1 | 11.1% |
-| pypi | 1 | 11.1% |
-
-## Release Artifact Workflows
-
-These workflows publish versioned packages to public registries consumed by 
end users.
-
-| Repository | Workflow | Ecosystems | Trigger | Auth |
-|------------|----------|------------|---------|------|
-| airflow | `release_dockerhub_image.yml` | docker_hub | workflow_dispatch 
with airflowVersion input (e.g. 3.0.1, 3.0.1rc1, 3.0.1b1) | DOCKERHUB_USER and 
DOCKERHUB_TOKEN secrets |
-| airflow | `release_single_dockerhub_image.yml` | docker_hub, ghcr | 
workflow_call | DOCKERHUB_USER/DOCKERHUB_TOKEN secrets for Docker Hub, 
GITHUB_TOKEN for GHCR |
-| kafka | `docker_promote.yml` | docker_hub | workflow_dispatch | 
secrets.DOCKERHUB_USER and secrets.DOCKERHUB_TOKEN |
-| kafka | `docker_rc_release.yml` | docker_hub | workflow_dispatch | 
secrets.DOCKERHUB_USER and secrets.DOCKERHUB_TOKEN |
-| spark | `release.yml` | apache_dist, maven_central, pypi | workflow_dispatch 
with inputs for branch, release-version, rc-count, and finalize; also scheduled 
cron | ASF credentials (ASF_USERNAME, ASF_PASSWORD, ASF_NEXUS_TOKEN), GPG key 
signing (GPG_PRIVATE_KEY, GPG_PASSPHRASE), PyPI API token (PYPI_API_TOKEN) |
-
-## Snapshot / Nightly Artifact Workflows
-
-These workflows publish snapshot or nightly builds to staging registries.
-
-| Repository | Workflow | Ecosystems | Trigger | Auth |
-|------------|----------|------------|---------|------|
-| spark | `publish_snapshot.yml` | maven_central | schedule (daily cron) and 
workflow_dispatch | ASF Nexus credentials (NEXUS_USER, NEXUS_PW, NEXUS_TOKEN) 
stored in GitHub secrets |
-
-## CI Infrastructure Image Workflows
-
-These workflows push Docker images used only for CI build caching, test 
execution, or build acceleration. They do not publish end-user artifacts.
-
-<details>
-<summary>Show 24 CI infrastructure workflows</summary>
-
-| Repository | Workflow | Target | Summary |
-|------------|----------|--------|---------|
-| airflow | `additional-ci-image-checks.yml` | ghcr | This workflow pushes 
early BuildX cache images to GitHub Container Registry (GHC |
-| airflow | `ci-image-build.yml` | ghcr | This workflow builds CI Docker 
images for Apache Airflow and conditionally pushe |
-| airflow | `finalize-tests.yml` | ghcr | This workflow finalizes test runs by 
updating constraints and pushing Docker bui |
-| airflow | `prod-image-build.yml` | ghcr | This workflow builds Apache 
Airflow production Docker images for CI/CD purposes. |
-| airflow | `push-image-cache.yml` | ghcr | This workflow pushes CI and PROD 
Docker image caches to GitHub Container Registr |
-| spark | `build_and_test.yml` | ghcr | This workflow builds and pushes Docker 
images to GitHub Container Registry (GHCR |
-| spark | `build_branch35.yml` | ghcr | This workflow calls a reusable 
workflow (build_and_test.yml) with packages:write |
-| spark | `build_branch40.yml` | ghcr | This workflow is a scheduled build job 
that calls a reusable workflow (build_and |
-| spark | `build_branch40_java21.yml` | ghcr | This workflow is a scheduled CI 
build that runs every 2 days for Apache Spark's |
-| spark | `build_branch40_python_pypy3.10.yml` | ghcr | This workflow calls a 
reusable workflow (build_and_test.yml) with packages:write |
-| spark | `build_branch41.yml` | ghcr | Scheduled nightly build workflow for 
Apache Spark branch-4.1 that calls a reusab |
-| spark | `build_branch41_java21.yml` | ghcr | This workflow is a scheduled 
nightly build that calls a reusable workflow (build |
-| spark | `build_branch41_python_pypy3.10.yml` | ghcr | Scheduled workflow 
that calls a reusable workflow (build_and_test.yml) with pack |
-| spark | `build_infra_images_cache.yml` | ghcr | Builds and pushes Docker 
images to GHCR for CI/CD infrastructure. Multiple test |
-| spark | `build_java21.yml` | ghcr | This workflow is a scheduled nightly 
build that calls a reusable workflow (build |
-| spark | `build_java25.yml` | ghcr | This workflow is a scheduled nightly 
build job that tests Apache Spark with Java |
-| spark | `build_main.yml` | ghcr | This workflow calls a reusable workflow 
(build_and_test.yml) with packages:write |
-| spark | `build_python_3.10.yml` | ghcr | This workflow calls a reusable 
workflow (build_and_test.yml) with packages:write |
-| spark | `build_python_3.11.yml` | ghcr | This workflow calls a reusable 
workflow (build_and_test.yml) with packages:write |
-| spark | `build_python_3.12_classic_only.yml` | ghcr | This workflow calls a 
reusable workflow (build_and_test.yml) with packages:write |
-| spark | `build_python_3.12_pandas_3.yml` | ghcr | This workflow is a 
scheduled nightly build that calls a reusable workflow (build |
-| spark | `build_python_3.13.yml` | ghcr | This workflow calls a reusable 
workflow (build_and_test.yml) with packages:write |
-| spark | `build_python_3.14.yml` | ghcr | This workflow calls a reusable 
workflow (build_and_test.yml) with packages:write |
-| spark | `build_python_3.14_nogil.yml` | ghcr | This workflow calls a 
reusable workflow (build_and_test.yml) with packages:write |
-
-</details>
-
-## Documentation / Website Workflows
-
-<details>
-<summary>Show 6 documentation workflows</summary>
-
-| Repository | Workflow | Target | Summary |
-|------------|----------|--------|---------|
-| airflow | `ci-image-checks.yml` | s3 | This workflow builds Apache Airflow 
documentation and publishes it to AWS S3 (s3 |
-| airflow | `publish-docs-to-s3.yml` | s3 | This workflow builds Apache 
Airflow documentation and publishes it to AWS S3 buc |
-| airflow | `registry-backfill.yml` | s3 | This workflow backfills Apache 
Airflow provider registry documentation to S3 buc |
-| airflow | `registry-build.yml` | s3 | Builds and publishes Apache Airflow 
provider registry documentation to S3. Extra |
-| spark | `build_coverage.yml` | codecov | This workflow runs Python coverage 
tests on a schedule and uploads results to Co |
-| spark | `pages.yml` | github_pages | Builds Apache Spark documentation using 
Jekyll, Sphinx, and other tools, then de |
-
-</details>
-
-## Security: Low Risk Findings
-
-GitHub-controlled values used directly in `run:` blocks. Not user-injectable 
but poor practice.
-
-<details>
-<summary>Show 11 low-risk findings</summary>
-
-- **apache/airflow** (`prod-image-build.yml`): [LOW] Direct interpolation of 
github.sha in run block at step 'Build PROD images w/ source providers'. While 
github.sha is GitHub-controlled and not user-injectable, best practice is to 
pass through env block.
-- **apache/airflow** (`publish-docs-to-s3.yml`): [LOW] GitHub-controlled value 
github.actor used directly in env blocks
-- **apache/airflow** (`publish-docs-to-s3.yml`): [LOW] GitHub-controlled value 
github.repository used directly in env blocks
-- **apache/airflow** (`registry-build.yml`): [LOW] GitHub-controlled value 
github.event.sender.login used in conditional expression
-- **apache/airflow** (`release_dockerhub_image.yml`): [LOW] GitHub-controlled 
value github.event.inputs.airflowVersion used in concurrency.group
-- **apache/airflow** (`release_dockerhub_image.yml`): [LOW] Input parameter 
airflowVersion passed through environment variables and shell scripts in 
build-info job
-- **apache/airflow** (`release_single_dockerhub_image.yml`): [LOW] 
GitHub-controlled value github.sha used directly in env block COMMIT_SHA
-- **apache/airflow** (`release_single_dockerhub_image.yml`): [LOW] 
GitHub-controlled value github.repository used directly in env block REPOSITORY
-- **apache/airflow** (`release_single_dockerhub_image.yml`): [LOW] 
GitHub-controlled value github.actor used in docker login command via ACTOR env 
variable
-- **apache/spark** (`publish_snapshot.yml`): [LOW] GitHub-controlled value 
matrix.branch used in checkout ref and GIT_REF environment variable
-- **apache/spark** (`release.yml`): [LOW] GitHub-controlled value github.actor 
used directly in GIT_NAME environment variable
-
-</details>
-
-## Detailed Results: Release & Snapshot Workflows
-
-### apache/airflow
-
-**2** release/snapshot workflows | Ecosystems: **docker_hub, ghcr** | Release 
Artifacts: 2
-
-**`release_dockerhub_image.yml`** — Release PROD images [Release Artifacts]
-- **Summary**: This workflow publishes production Apache Airflow Docker images 
to Docker Hub. It is manually triggered with an Airflow version parameter 
(supporting release, RC, and beta versions). The workflow builds images for 
multiple Python versions and platforms (amd64 and optionally arm64), then 
delegates to a reusable workflow (release_single_dockerhub_image.yml) that 
performs the actual Docker Hub publishing. Access is restricted to a whitelist 
of Apache Airflow committers.
-- **Ecosystems**: docker_hub
-- **Trigger**: workflow_dispatch with airflowVersion input (e.g. 3.0.1, 
3.0.1rc1, 3.0.1b1)
-- **Auth**: DOCKERHUB_USER and DOCKERHUB_TOKEN secrets
-- **Confidence**: high
-
-**`release_single_dockerhub_image.yml`** — Release single PROD image [Release 
Artifacts]
-- **Summary**: Builds and publishes versioned Apache Airflow production Docker 
images to Docker Hub for multiple platforms (linux/amd64, linux/arm64) and 
Python versions. The workflow builds both regular and slim images, verifies 
them, then merges multi-platform manifests. Images are tagged with specific 
Airflow versions (e.g., 3.0.1, 3.0.1rc1) and optionally as 'latest'. Also logs 
into GHCR for intermediate operations.
-- **Ecosystems**: docker_hub, ghcr
-- **Trigger**: workflow_call
-- **Auth**: DOCKERHUB_USER/DOCKERHUB_TOKEN secrets for Docker Hub, 
GITHUB_TOKEN for GHCR
-- **Confidence**: high
-- **Commands**: `breeze release-management release-prod-images`, `breeze 
release-management merge-prod-images`
-
-### apache/kafka
-
-**2** release/snapshot workflows | Ecosystems: **docker_hub** | Release 
Artifacts: 2
-
-**`docker_promote.yml`** — Promote Release Candidate Docker Image [Release 
Artifacts]
-- **Summary**: This workflow promotes Apache Kafka release candidate Docker 
images to final release versions on Docker Hub. It uses workflow_dispatch to 
manually trigger promotion, taking RC image names (e.g., 
apache/kafka:3.8.0-rc0) and promoted image names (e.g., apache/kafka:3.8.0) as 
inputs. The workflow authenticates to Docker Hub and uses docker buildx 
imagetools to copy/tag the RC image as the promoted release image. User inputs 
are safely passed through env variables before being [...]
-- **Ecosystems**: docker_hub
-- **Trigger**: workflow_dispatch
-- **Auth**: secrets.DOCKERHUB_USER and secrets.DOCKERHUB_TOKEN
-- **Confidence**: high
-- **GitHub Actions**: 
`docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef`
-- **Commands**: `docker buildx imagetools create --tag $PROMOTED_DOCKER_IMAGE 
$RC_DOCKER_IMAGE`
-
-**`docker_rc_release.yml`** — Build and Push Release Candidate Docker Image 
[Release Artifacts]
-- **Summary**: This workflow builds and publishes Apache Kafka release 
candidate Docker images to Docker Hub. It supports both JVM and native image 
types, is manually triggered via workflow_dispatch, and uses a Python script 
(docker_release.py) to build and push multi-architecture images (via QEMU and 
Docker Buildx) to apache/kafka or apache/kafka-native repositories on Docker 
Hub.
-- **Ecosystems**: docker_hub
-- **Trigger**: workflow_dispatch
-- **Auth**: secrets.DOCKERHUB_USER and secrets.DOCKERHUB_TOKEN
-- **Confidence**: high
-- **GitHub Actions**: 
`docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef`
-- **Commands**: `python docker/docker_release.py $RC_DOCKER_IMAGE --kafka-url 
$KAFKA_URL --image-type $IMAGE_TYPE`
-
-### apache/spark
-
-**2** release/snapshot workflows | Ecosystems: **apache_dist, maven_central, 
pypi** | Release Artifacts: 1, Snapshot / Nightly Artifacts: 1
-
-**`release.yml`** — Release Apache Spark [Release Artifacts]
-- **Summary**: This workflow orchestrates the Apache Spark release process, 
publishing release artifacts to Apache Distribution SVN (apache_dist), Maven 
Central (via ASF Nexus), and PyPI. It supports both RC creation and 
finalization modes. The workflow calls dev/create-release/do-release-docker.sh 
which handles the actual publishing. It includes dry-run capability and is 
designed to run in forked repositories with manual dispatch. The finalize mode 
converts RC artifacts to official rele [...]
-- **Ecosystems**: apache_dist, maven_central, pypi
-- **Trigger**: workflow_dispatch with inputs for branch, release-version, 
rc-count, and finalize; also scheduled cron
-- **Auth**: ASF credentials (ASF_USERNAME, ASF_PASSWORD, ASF_NEXUS_TOKEN), GPG 
key signing (GPG_PRIVATE_KEY, GPG_PASSPHRASE), PyPI API token (PYPI_API_TOKEN)
-- **Confidence**: high
-- **Commands**: `dev/create-release/do-release-docker.sh`
-
-**`publish_snapshot.yml`** — Publish snapshot [Snapshot / Nightly Artifacts]
-- **Summary**: Publishes Apache Spark snapshot builds to ASF Nexus repository 
on a daily schedule for multiple branches (master, branch-4.1, branch-4.0, 
branch-3.5). Uses Maven to build and deploy snapshot artifacts with ASF Nexus 
authentication.
-- **Ecosystems**: maven_central
-- **Trigger**: schedule (daily cron) and workflow_dispatch
-- **Auth**: ASF Nexus credentials (NEXUS_USER, NEXUS_PW, NEXUS_TOKEN) stored 
in GitHub secrets
-- **Confidence**: high
-- **Commands**: `./dev/create-release/release-build.sh publish-snapshot`
-
----
-
-*Cached in `ci-classification:apache`. Set `clear_cache` to `true` to force a 
fresh scan. Raw YAML stored in `ci-workflows:apache`.*
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(tooling-agents) 02/02: Pushing github review code up

Reply via email to