This is an automated email from the ASF dual-hosted git repository.
akm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-agents.git
The following commit(s) were added to refs/heads/main by this push:
new f8cba56 Minor fixes
f8cba56 is described below
commit f8cba56e0e45ef5f2dcc2d1040ace76b3e0b3f83
Author: Andrew Musselman <[email protected]>
AuthorDate: Thu Apr 2 20:24:48 2026 -0700
Minor fixes
---
repos/apache/github-review/agents/summary.py | 202 +++++++++++++++------------
1 file changed, 116 insertions(+), 86 deletions(-)
diff --git a/repos/apache/github-review/agents/summary.py
b/repos/apache/github-review/agents/summary.py
index abbee50..0455785 100644
--- a/repos/apache/github-review/agents/summary.py
+++ b/repos/apache/github-review/agents/summary.py
@@ -25,11 +25,9 @@ async def run(input_dict, tools):
print(f"Security report: {len(sec_report or '')} chars", flush=True)
# --- Parse per-repo ecosystems from publishing report text ---
- # Matches lines like: ### apache/iggy\n**4** ... | Ecosystems:
**crates_io, docker_hub** | ...
repo_ecosystems = {}
- repo_categories = {} # repo -> {release: N, snapshot: N}
+ repo_categories = {}
if pub_report:
- # Match detailed results headers
header_pattern = re.compile(
r'### ' + re.escape(f'{owner}/') + r'(\S+)\s*\n+'
r'\*\*(\d+)\*\* release/snapshot workflows \| Ecosystems:
\*\*([^*]+)\*\*'
@@ -53,8 +51,7 @@ async def run(input_dict, tools):
all_sec_keys = security_ns.list_keys()
finding_keys = [k for k in all_sec_keys if k.startswith("findings:")]
- repo_security = {} # repo -> {severities, total, worst, top_checks}
- SEV_ORDER = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3, "INFO":
4}
+ repo_security = {}
for k in finding_keys:
repo = k.replace("findings:", "")
@@ -68,7 +65,6 @@ async def run(input_dict, tools):
sev = f.get("severity", "INFO")
sev_counts[sev] = sev_counts.get(sev, 0) + 1
chk = f.get("check", "unknown")
- # Skip info-level noise for top checks
if sev != "INFO":
check_counts[chk] = check_counts.get(chk, 0) + 1
@@ -78,7 +74,6 @@ async def run(input_dict, tools):
worst = s
break
- # Top 3 non-INFO checks by count
top_checks = sorted(check_counts.items(), key=lambda x: -x[1])[:3]
repo_security[repo] = {
@@ -91,7 +86,6 @@ async def run(input_dict, tools):
print(f"Security data for {len(repo_security)} repos", flush=True)
# --- Parse trusted publishing opportunities ---
- # Which repos have TP opportunities (using long-lived tokens where
OIDC is available)
tp_repos = set()
tp_opportunities = pub_stats.get("trusted_publishing_opportunities",
[])
if isinstance(tp_opportunities, list):
@@ -100,7 +94,6 @@ async def run(input_dict, tools):
tp_repos.add(opp.get("repo", ""))
elif isinstance(opp, str):
tp_repos.add(opp)
- # Also try parsing from report text for reliability
if pub_report:
tp_section = False
for line in pub_report.split("\n"):
@@ -118,14 +111,6 @@ async def run(input_dict, tools):
print(f"Trusted publishing opportunity repos: {len(tp_repos)}",
flush=True)
- # --- Identify repos already using OIDC ---
- oidc_repos = set()
- if pub_report:
- for line in pub_report.split("\n"):
- if "OIDC" in line and ("trusted publishing" in line.lower() or
"id-token" in line.lower()):
- # Find which repo section we're in
- pass # Complex to parse; skip for now
-
# --- Build combined risk table ---
publishing_repos = set(pub_stats.get("publishing_repos", []))
all_repos = publishing_repos | set(repo_security.keys())
@@ -142,7 +127,6 @@ async def run(input_dict, tools):
publishes = repo in publishing_repos
has_tp_opportunity = repo in tp_repos
- # Risk score for sorting: publishing breadth * security severity
eco_score = len(ecosystems) if ecosystems else (1 if publishes
else 0)
sev_score = {"CRITICAL": 100, "HIGH": 50, "MEDIUM": 10, "LOW": 3,
"INFO": 1, "—": 0}.get(worst, 0)
risk_score = eco_score * sev_score + total
@@ -165,6 +149,8 @@ async def run(input_dict, tools):
# --- Classify into tiers ---
critical_repos = [r for r in repo_rows if r["worst"] == "CRITICAL"]
high_repos = [r for r in repo_rows if r["worst"] == "HIGH"]
+ high_publishing = [r for r in high_repos if r["publishes"]]
+ high_nonpublishing = [r for r in high_repos if not r["publishes"]]
medium_repos = [r for r in repo_rows if r["worst"] == "MEDIUM" and
r["publishes"]]
low_repos = [r for r in repo_rows if r["worst"] in ("LOW", "INFO",
"—") and r["publishes"]]
@@ -203,6 +189,11 @@ async def run(input_dict, tools):
return ""
return ", ".join(f"{chk} ({n})" for chk, n in top_checks)
+ def plural(n, singular, plural_form=None):
+ if plural_form is None:
+ plural_form = singular + "s"
+ return singular if n == 1 else plural_form
+
lines = []
lines.append(f"# Apache GitHub Review: Combined Risk Assessment\n")
lines.append(f"Cross-referencing CI publishing analysis with security
scan results "
@@ -229,24 +220,27 @@ async def run(input_dict, tools):
lines.append(f"| Repos publishing to registries |
{len(publishing_repos)} |")
lines.append(f"| Total security findings |
{sec_stats.get('total_findings', '?')} |")
sev = sec_stats.get("severity_counts", {})
- lines.append(f"| CRITICAL findings | {sev.get('CRITICAL', 0)} |")
- lines.append(f"| HIGH findings | {sev.get('HIGH', 0)} |")
- lines.append(f"| Repos needing trusted publishing migration |
{len(tp_repos)} |")
+ if sev.get("CRITICAL", 0):
+ lines.append(f"| CRITICAL findings | {sev.get('CRITICAL', 0)} |")
+ if sev.get("HIGH", 0):
+ lines.append(f"| HIGH findings | {sev.get('HIGH', 0)} |")
+ if tp_repos:
+ lines.append(f"| Repos needing trusted publishing migration |
{len(tp_repos)} |")
eco = pub_stats.get("ecosystem_counts", {})
- # Filter out documentation/CI targets for the publishing risk summary
- doc_targets = {"codecov", "github_pages", "surge_sh", "s3", "gcr"}
+ doc_targets = {"codecov", "github_pages", "surge_sh", "s3", "gcr",
"ghcr"}
release_eco = {k: v for k, v in eco.items() if k not in doc_targets}
top_eco = sorted(release_eco.items(), key=lambda x: -x[1])[:5]
- eco_summary = ", ".join(f"{e} ({c})" for e, c in top_eco)
+ eco_summary = ", ".join(f"{e} ({c})" for e, c in top_eco) if top_eco
else "none"
lines.append(f"| Top ecosystems | {eco_summary} |")
lines.append("")
- # --- CRITICAL + HIGH tier ---
- if critical_repos or high_repos:
+ # --- CRITICAL + HIGH publishing tier ---
+ immediate = critical_repos + high_publishing
+ if immediate:
lines.append("## Immediate Attention Required\n")
lines.append("Repos with CRITICAL or HIGH security findings that
also publish packages.\n")
- for r in critical_repos + high_repos:
+ for r in immediate:
repo = r["repo"]
lines.append(f"### {owner}/{repo}\n")
@@ -278,7 +272,27 @@ async def run(input_dict, tools):
details.append(f"**Details:** {repo_pub_link(repo)} ·
{repo_sec_link(repo)}")
- # Join with double-space + newline for markdown line breaks
+ lines.append(" \n".join(details))
+ lines.append("")
+
+ # --- HIGH non-publishing repos ---
+ if high_nonpublishing:
+ lines.append("## Non-Publishing Repos with HIGH Findings\n")
+ lines.append("These repos do not publish packages but have
HIGH-severity security findings "
+ "in their CI workflows.\n")
+
+ for r in high_nonpublishing:
+ repo = r["repo"]
+ lines.append(f"### {owner}/{repo}\n")
+
+ details = []
+ details.append(f"**Security:** {r['total']} findings —
{sev_summary(r['sev_counts'])}")
+
+ if r["top_checks"]:
+ details.append(f"**Top issues:**
{check_summary(r['top_checks'])}")
+
+ details.append(f"**Details:** {repo_sec_link(repo)}")
+
lines.append(" \n".join(details))
lines.append("")
@@ -291,7 +305,7 @@ async def run(input_dict, tools):
for r in medium_repos:
repo = r["repo"]
- eco = eco_str(r["ecosystems"]) if r["ecosystems"] else "npm"
+ eco = eco_str(r["ecosystems"]) if r["ecosystems"] else "—"
top = r["top_checks"][0][0] if r["top_checks"] else
"unpinned_actions"
tp = "migrate" if r["has_tp"] else "—"
links = f"{repo_pub_link(repo)} · {repo_sec_link(repo)}"
@@ -301,10 +315,11 @@ async def run(input_dict, tools):
# --- LOW tier summary ---
if low_repos:
+ n_low = len(low_repos)
lines.append("## Low Risk: Publishing Repos\n")
- lines.append(f"{len(low_repos)} repos publish packages with only
LOW/INFO-level security findings "
- f"(missing CODEOWNERS, no dependabot config).\n")
- lines.append(f"<details>\n<summary>Show {len(low_repos)}
repos</summary>\n")
+ lines.append(f"{n_low} {plural(n_low, 'repo')} {plural(n_low,
'publishes', 'publish')} packages with only LOW/INFO-level "
+ f"security findings (missing CODEOWNERS, no
dependabot config).\n")
+ lines.append(f"<details>\n<summary>Show {n_low} {plural(n_low,
'repo')}</summary>\n")
for r in low_repos:
repo = r["repo"]
eco = eco_str(r["ecosystems"]) if r["ecosystems"] else "—"
@@ -312,38 +327,46 @@ async def run(input_dict, tools):
f"({repo_pub_link(repo)} ·
{repo_sec_link(repo)})")
lines.append(f"\n</details>\n")
- # --- Trusted Publishing summary ---
- lines.append("## Trusted Publishing Opportunities\n")
- lines.append(f"**{len(tp_repos)}** repos use long-lived tokens to
publish to ecosystems that support "
- f"OIDC trusted publishing. Migrating eliminates stored
secrets.\n")
- lines.append(f"Full details: [{PUB} → Trusted Publishing]"
- f"({PUB}#trusted-publishing-migration-opportunities)\n")
-
- # Group by ecosystem from pub_report
- tp_ecosystems = {}
- if pub_report:
- current_eco = None
- for line in pub_report.split("\n"):
- if line.startswith("### ") and "Trusted Publishing" not in
line:
- # Check if this is an ecosystem header inside TP section
- eco_name = line[4:].strip()
- if eco_name in ("crates.io", "npm", "NuGet", "PyPI",
"RubyGems"):
- current_eco = eco_name
+ # --- Trusted Publishing summary (skip if none) ---
+ if tp_repos:
+ n_tp = len(tp_repos)
+ lines.append("## Trusted Publishing Opportunities\n")
+ lines.append(f"**{n_tp}** {plural(n_tp, 'repo')} {plural(n_tp,
'uses', 'use')} long-lived tokens to publish to ecosystems "
+ f"that support OIDC trusted publishing. Migrating
eliminates stored secrets.\n")
+ lines.append(f"Full details: [{PUB} → Trusted Publishing]"
+
f"({PUB}#trusted-publishing-migration-opportunities)\n")
+
+ tp_ecosystems = {}
+ if pub_report:
+ current_eco = None
+ for line in pub_report.split("\n"):
+ if line.startswith("### ") and "Trusted Publishing" not in
line:
+ eco_name = line[4:].strip()
+ if eco_name in ("crates.io", "npm", "NuGet", "PyPI",
"RubyGems"):
+ current_eco = eco_name
+ continue
+ if current_eco and line.startswith("## "):
+ current_eco = None
continue
- if current_eco and line.startswith("## "):
- current_eco = None
- continue
- if current_eco and "| " in line and "`" in line:
- parts = [p.strip() for p in line.split("|")]
- if len(parts) > 2 and parts[1] and parts[1] !=
"Repository":
- tp_ecosystems.setdefault(current_eco,
[]).append(parts[1])
-
- for eco, repos in sorted(tp_ecosystems.items()):
- unique = sorted(set(repos))
- lines.append(f"- **{eco}**: {', '.join(unique)}")
- lines.append("")
+ if current_eco and "| " in line and "`" in line:
+ parts = [p.strip() for p in line.split("|")]
+ if len(parts) > 2 and parts[1] and parts[1] !=
"Repository":
+ tp_ecosystems.setdefault(current_eco,
[]).append(parts[1])
+
+ for eco, repos in sorted(tp_ecosystems.items()):
+ unique = sorted(set(repos))
+ lines.append(f"- **{eco}**: {', '.join(unique)}")
+ lines.append("")
- # --- Key recommendations ---
+ # --- Summary when nothing in the intersection ---
+ if not publishing_repos:
+ lines.append("## Summary\n")
+ lines.append(f"None of the {pub_stats.get('repos_scanned', '?')}
scanned repos publish packages to "
+ f"registries. Security findings are limited to
general CI hygiene "
+ f"(unpinned actions, missing CODEOWNERS). See the "
+ f"[security report]({SEC}) for details.\n")
+
+ # --- Key recommendations (only include items with nonzero counts) ---
lines.append("## Key Recommendations\n")
rec_num = 1
@@ -352,43 +375,50 @@ async def run(input_dict, tools):
crit_names = ", ".join("`" + r["repo"] + "`" for r in
critical_repos)
verb = "has" if len(critical_repos) == 1 else "have"
lines.append(f"{rec_num}. **Fix CRITICAL findings immediately.** "
- f"{crit_names} {verb} "
- f"exploitable vulnerabilities in publishing
workflows.")
+ f"{crit_names} {verb} exploitable CI vulnerabilities "
+ f"([details]({SEC})).")
rec_num += 1
- lines.append(f"{rec_num}. **Migrate to trusted publishing.** "
- f"{len(tp_repos)} repos can eliminate long-lived secrets
by adopting OIDC. "
- f"Start with repos publishing to PyPI and npm — "
- f"[migration
guide]({PUB}#trusted-publishing-migration-opportunities).")
- rec_num += 1
+ if tp_repos:
+ n_tp = len(tp_repos)
+ lines.append(f"{rec_num}. **Migrate to trusted publishing.** "
+ f"{n_tp} {plural(n_tp, 'repo')} can eliminate
long-lived secrets by adopting OIDC. "
+ f"Start with repos publishing to PyPI and npm — "
+ f"[migration
guide]({PUB}#trusted-publishing-migration-opportunities).")
+ rec_num += 1
if high_repos:
- # Count all repos that have any HIGH findings (not just worst=HIGH)
repos_with_high = [r for r in repo_rows
if r["sev_counts"].get("HIGH", 0) > 0]
+ n_high = len(repos_with_high)
lines.append(f"{rec_num}. **Review composite action injection
patterns.** "
- f"{len(repos_with_high)} repos have HIGH findings
from `inputs.*` directly interpolated "
+ f"{n_high} {plural(n_high, 'repo')} {plural(n_high,
'has', 'have')} "
+ f"HIGH findings from `inputs.*` directly interpolated
"
f"in composite action run blocks. While these are
called from trusted contexts today, "
f"they create hidden injection surfaces.")
rec_num += 1
- lines.append(f"{rec_num}. **Pin actions to SHA hashes.** "
- f"All {sec_stats.get('repos_with_findings', '?')} repos
use mutable tag refs. "
- f"See the [unpinned actions
findings]({SEC}#medium-findings) for per-repo counts.")
- rec_num += 1
-
- # Count repos with missing_codeowners or codeowners_gap findings
- no_codeowners = 0
- for repo, sec in repo_security.items():
- for chk, cnt in sec.get("top_checks", []):
- pass # top_checks doesn't have all checks
- # Count from check_counts in sec_stats
+ repos_with_findings = sec_stats.get("repos_with_findings", 0)
+ if repos_with_findings:
+ n_repos = repos_with_findings
+ lines.append(f"{rec_num}. **Pin actions to SHA hashes.** "
+ f"{'The scanned repo uses' if n_repos == 1 else f'All
{n_repos} repos use'} "
+ f"mutable tag refs. "
+ f"See the [unpinned actions
findings]({SEC}#medium-findings) for per-repo counts.")
+ rec_num += 1
+
codeowners_missing = sec_stats.get("check_counts",
{}).get("missing_codeowners", 0)
codeowners_gap = sec_stats.get("check_counts",
{}).get("codeowners_gap", 0)
- lines.append(f"{rec_num}. **Add CODEOWNERS with `.github/` coverage.**
"
- f"{codeowners_missing} repos have no CODEOWNERS file and "
- f"{codeowners_gap} have CODEOWNERS without `.github/`
rules. "
- f"Workflow changes can bypass security review.")
+ if codeowners_missing or codeowners_gap:
+ parts = []
+ if codeowners_missing:
+ parts.append(f"{codeowners_missing}
{plural(codeowners_missing, 'repo')} "
+ f"{plural(codeowners_missing, 'has', 'have')} no
CODEOWNERS file")
+ if codeowners_gap:
+ parts.append(f"{codeowners_gap} {plural(codeowners_gap, 'has',
'have')} CODEOWNERS "
+ f"without `.github/` rules")
+ lines.append(f"{rec_num}. **Add CODEOWNERS with `.github/`
coverage.** "
+ f"{' and '.join(parts)}. Workflow changes can bypass
security review.")
lines.append("")
lines.append("---\n")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]