(airflow-steward) branch main updated: add Privacy-LLM gate-check validator (#215)

choo121600 Sun, 24 May 2026 23:50:46 -0700

This is an automated email from the ASF dual-hosted git repository.

choo121600 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-steward.git



The following commit(s) were added to refs/heads/main by this push:
     new 7948591  add Privacy-LLM gate-check validator  (#215)
7948591 is described below

commit 794859175a125ed116fc08e17d201cfcf08efd71
Author: Justin Mclean <[email protected]>
AuthorDate: Mon May 25 16:50:34 2026 +1000

    add Privacy-LLM gate-check validator  (#215)
    
    * feat(privacy-llm): add Privacy-LLM gate-check validation
    
    * Update tools/skill-validator/src/skill_validator/__init__.py
    
    Co-authored-by: André Ahlert <[email protected]>
    
    ---------
    
    Co-authored-by: André Ahlert <[email protected]>
---
 .claude/skills/security-issue-deduplicate/SKILL.md |  11 +
 .claude/skills/security-issue-fix/SKILL.md         |  11 +
 .../src/skill_validator/__init__.py                | 199 +++++++++++++++++-
 tools/skill-validator/tests/test_validator.py      | 233 +++++++++++++++++++++
 4 files changed, 450 insertions(+), 4 deletions(-)

diff --git a/.claude/skills/security-issue-deduplicate/SKILL.md 
b/.claude/skills/security-issue-deduplicate/SKILL.md
index 716008a..2610a8b 100644
--- a/.claude/skills/security-issue-deduplicate/SKILL.md
+++ b/.claude/skills/security-issue-deduplicate/SKILL.md
@@ -162,6 +162,17 @@ in `docs/prerequisites.md`.
    `gh issue view <kept> --repo <tracker> --json number`
    and the same for `<dropped>` — before any write.
 3. `uv --version` returns.
+4. **Privacy-LLM gate-check** passes:
+
+   ```bash
+   uv run --project <framework>/tools/privacy-llm/checker \
+     privacy-llm-check
+   ```
+
+   This skill reads both tracker issue bodies in Step 1;
+   the redact-after-fetch protocol
+   (see [`tools/privacy-llm/wiring.md`](../../../tools/privacy-llm/wiring.md))
+   applies to those fetches.
 
 If any check fails, stop. A partial dedup (body merged but
 dropped tracker left open, or CVE JSON not regenerated) is worse
diff --git a/.claude/skills/security-issue-fix/SKILL.md 
b/.claude/skills/security-issue-fix/SKILL.md
index 2ff4b9f..bb5f8eb 100644
--- a/.claude/skills/security-issue-fix/SKILL.md
+++ b/.claude/skills/security-issue-fix/SKILL.md
@@ -201,6 +201,17 @@ continue.
    `breeze` is required for the area of the fix, also
    `breeze --version`. Any missing tool stops the skill;
    installing them mid-run is out of scope.
+6. **Privacy-LLM gate-check** passes:
+
+   ```bash
+   uv run --project <framework>/tools/privacy-llm/checker \
+     privacy-llm-check
+   ```
+
+   This skill reads the `<tracker>` issue body to update the
+   "PR with the fix" field; the redact-after-fetch protocol
+   (see [`tools/privacy-llm/wiring.md`](../../../tools/privacy-llm/wiring.md))
+   applies to that fetch.
 
 Only after **every** check is green, proceed to Step 1.
 
diff --git a/tools/skill-validator/src/skill_validator/__init__.py 
b/tools/skill-validator/src/skill_validator/__init__.py
index 23480e9..ee99270 100644
--- a/tools/skill-validator/src/skill_validator/__init__.py
+++ b/tools/skill-validator/src/skill_validator/__init__.py
@@ -68,9 +68,54 @@ PROJECTS_TEMPLATE_DIR = Path("projects/_template")
 REQUIRED_FRONTMATTER_KEYS = {"name", "description", "license"}
 OPTIONAL_FRONTMATTER_KEYS = {"when_to_use", "mode"}
 ALLOWED_LICENSES = {"Apache-2.0"}
-# MISSION mode taxonomy — see docs/modes.md.
-# "Auto-merge" deliberately excluded: it is off per MISSION sequencing.
-ALLOWED_MODES = {"Triage", "Mentoring", "Drafting", "Pairing"}
+
+
+def _read_mode_table() -> dict[str, str]:
+    """Read the canonical MISSION mode table from ``docs/modes.md``."""
+    starts = [Path.cwd().resolve(), Path(__file__).resolve().parent]
+    roots: list[Path] = []
+    for start in starts:
+        roots.extend([start, *start.parents])
+
+    rejected: list[str] = []
+    for root in roots:
+        modes_doc = root / DOCS_DIR / "modes.md"
+        if not modes_doc.is_file():
+            continue
+        text = modes_doc.read_text(encoding="utf-8")
+        if "## Modes at a glance" not in text:
+            rejected.append(f"{modes_doc}: missing '## Modes at a glance' 
section marker")
+            continue
+        modes_table = text.split("## Modes at a glance", 1)[1].split("## 
Triage", 1)[0]
+        modes: dict[str, str] = {}
+        for line in modes_table.splitlines():
+            if not line.startswith("| **"):
+                continue
+            cells = [cell.strip() for cell in line.strip("|").split("|")]
+            if len(cells) < 3:
+                continue
+            mode = cells[0].strip("*")
+            status = cells[2].strip()
+            if mode and status:
+                modes[mode] = status
+        if modes:
+            return modes
+        rejected.append(
+            f"{modes_doc}: found '## Modes at a glance' but parsed 0 modes "
+            f"(expected rows like '| **<Mode>** | … | <status> |')"
+        )
+
+    if rejected:
+        raise RuntimeError("could not parse mode taxonomy from docs/modes.md — 
" + "; ".join(rejected))
+    searched = dict.fromkeys(str(r / DOCS_DIR / "modes.md") for r in roots)
+    raise RuntimeError("could not locate docs/modes.md; searched: " + ", 
".join(searched))
+
+
+# MISSION mode taxonomy — docs/modes.md is canonical.
+_MODE_STATUS_BY_NAME = _read_mode_table()
+_MODE_TAXONOMY = set(_MODE_STATUS_BY_NAME)
+_OFF_MODES = {mode for mode, status in _MODE_STATUS_BY_NAME.items() if status 
== "off"}
+ALLOWED_MODES = _MODE_TAXONOMY - _OFF_MODES
 
 # Forbidden hardcoded project references (fixed strings, case-sensitive)
 FORBIDDEN_PATTERNS: list[str] = [
@@ -147,6 +192,7 @@ INJECTION_GUARD_TODO_CATEGORY = "injection_guard_todo"
 
 GH_LIST_CATEGORY = "gh_list_no_limit"
 SECURITY_PATTERN_CATEGORY = "security_pattern"
+PRIVACY_CATEGORY = "privacy"
 SOFT_CATEGORIES: frozenset[str] = frozenset(
     {
         PRINCIPLE_CATEGORY,
@@ -154,6 +200,7 @@ SOFT_CATEGORIES: frozenset[str] = frozenset(
         INJECTION_GUARD_TODO_CATEGORY,
         SECURITY_PATTERN_CATEGORY,
         GH_LIST_CATEGORY,
+        PRIVACY_CATEGORY,
     }
 )
 
@@ -220,6 +267,36 @@ _FIELD_PLACEHOLDER_RE = re.compile(
     r"(?:[\"'][^\"'\s]*<[^>]+>[^\"'\s]*[\"']|[^\s\"']*<[^>]+>[^\s\"']*)"
 )
 
+# ---------------------------------------------------------------------------
+# Privacy-LLM gate-check constants (write-skill/security-checklist.md § 
Pattern 6)
+# ---------------------------------------------------------------------------
+
+# Modes that can process external / attacker-controlled content and need the
+# Privacy-LLM gate when they read private tracker bodies.  Derived from
+# docs/modes.md taxonomy constants above: Pairing is intentionally excluded
+# because the human remains in the loop; Auto-merge is currently excluded only
+# because it is in _OFF_MODES.  When the first Auto-merge skill ships, remove
+# it from _OFF_MODES so body-reading Auto-merge skills are gated by default.
+_PRIVACY_EXTERNAL_CONTENT_MODES: frozenset[str] = frozenset(ALLOWED_MODES - 
{"Pairing"})
+
+_TRACKER_PLACEHOLDER = "<tracker>"
+_TRACKER_ISSUE_VIEW_RE = re.compile(r"\bgh\s+issue\s+view\b")
+_TRACKER_ISSUE_API_RE = 
re.compile(r"\bgh\s+api\s+/?repos/<tracker>/issues/[^\s`]+")
+_TRACKER_ISSUE_API_MUTATION_RE = 
re.compile(r"\s-X\s+(?:PATCH|POST|PUT|DELETE)\b")
+# TODO: detect body reads through ``gh api graphql`` and
+# ``gh issue list --json body`` once the validator has command parsing
+# rich enough to avoid broad prose false positives.
+_PRIVACY_LLM_GATE_PHRASE = "privacy-llm-check"
+_PRIVACY_GATE_SECTION_RE = re.compile(
+    r"^(?:"
+    r"prerequisites?(?:\b|$)"
+    r"|pre[- ]?flight(?:\b|$)"
+    r"|step\s*0(?:\b|$)"
+    r")",
+    re.IGNORECASE,
+)
+_ANTI_EXAMPLE_SECTION_RE = re.compile(r"\b(?:don'?t|anti[- 
]?example|bad|wrong)\b", re.IGNORECASE)
+
 ACTION_INVENTORY_COMMA_THRESHOLD = 5
 
 DISTINCT_FROM_RE = re.compile(
@@ -421,7 +498,7 @@ def extract_headings(text: str) -> set[str]:
 # or attacker-controlled content.
 _BODY_INLINE_RE = re.compile(r'--body[\s=]["\']')
 
-_FENCED_CODE_RE = re.compile(r"^```[\s\S]*?^```", re.MULTILINE)
+_FENCED_CODE_RE = re.compile(r"^ {0,3}```[\s\S]*?^ {0,3}```", re.MULTILINE)
 _DOUBLE_BACKTICK_RE = re.compile(r"``[\s\S]+?``")
 _SINGLE_BACKTICK_RE = re.compile(r"(?<!`)`(?!`)[\s\S]+?(?<!`)`(?!`)")
 
@@ -755,6 +832,118 @@ def validate_security_patterns(path: Path, text: str) -> 
Iterable[Violation]:
         )
 
 
+# ---------------------------------------------------------------------------
+# Privacy-LLM gate-check (write-skill/security-checklist.md § Pattern 6)
+# ---------------------------------------------------------------------------
+
+
+def _heading_text(raw: str) -> str:
+    text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", raw.strip())
+    text = text.strip("#").strip()
+    return text
+
+
+def _fenced_code_blocks(text: str) -> list[str]:
+    return [m.group(0) for m in _FENCED_CODE_RE.finditer(text)]
+
+
+def _fenced_code_blocks_in_privacy_gate_sections(text: str) -> list[str]:
+    """Return fenced code blocks inside Prerequisites / Preflight / Step 0 
sections."""
+    heading_re = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
+    headings = list(heading_re.finditer(text))
+    heading_index = 0
+    stack: list[tuple[int, str]] = []
+    blocks: list[str] = []
+
+    for block in _FENCED_CODE_RE.finditer(text):
+        while heading_index < len(headings) and 
headings[heading_index].start() < block.start():
+            heading = headings[heading_index]
+            level = len(heading.group(1))
+            title = _heading_text(heading.group(2))
+            stack = [(old_level, old_title) for old_level, old_title in stack 
if old_level < level]
+            stack.append((level, title))
+            heading_index += 1
+
+        titles = [title for _, title in stack]
+        if any(_ANTI_EXAMPLE_SECTION_RE.search(title) for title in titles):
+            continue
+        if any(_PRIVACY_GATE_SECTION_RE.search(title) for title in titles):
+            blocks.append(block.group(0))
+
+    return blocks
+
+
+def _shell_logical_lines(text: str) -> list[str]:
+    lines: list[str] = []
+    current: list[str] = []
+    for line in text.splitlines():
+        stripped = line.rstrip()
+        if stripped.endswith("\\"):
+            current.append(stripped[:-1].strip())
+            continue
+        if current:
+            current.append(stripped.strip())
+            lines.append(" ".join(part for part in current if part))
+            current = []
+        else:
+            lines.append(line)
+    if current:
+        lines.append(" ".join(part for part in current if part))
+    return lines
+
+
+def _has_tracker_body_read(text: str) -> bool:
+    body = _strip_html_comments(_skill_body(text))
+    if _TRACKER_ISSUE_VIEW_RE.search(body):
+        return True
+    for command in _shell_logical_lines(body):
+        if _TRACKER_ISSUE_API_RE.search(command) and not 
_TRACKER_ISSUE_API_MUTATION_RE.search(command):
+            return True
+    return False
+
+
+def _has_privacy_gate_command(text: str) -> bool:
+    body = _strip_html_comments(_skill_body(text))
+    return any(
+        _PRIVACY_LLM_GATE_PHRASE in block for block in 
_fenced_code_blocks_in_privacy_gate_sections(body)
+    )
+
+
+def validate_privacy_patterns(path: Path, text: str) -> Iterable[Violation]:
+    """Check Privacy-LLM gate-check convention from 
``write-skill/security-checklist.md``.
+
+    Pattern 6 applies to SKILL.md entry points whose mode processes external
+    content and whose workflow reads full issue bodies from the private
+    ``<tracker>`` repository. The gate is considered present only when
+    ``privacy-llm-check`` appears in a fenced command block; prose, HTML
+    comments, TODO notes, and anti-examples do not satisfy the check.
+    """
+    if path.name != "SKILL.md":
+        return
+
+    fm = parse_frontmatter(text) or {}
+    mode = fm.get("mode", "")
+    if mode not in _PRIVACY_EXTERNAL_CONTENT_MODES:
+        return
+
+    if _TRACKER_PLACEHOLDER not in text:
+        return
+    if not _has_tracker_body_read(text):
+        return
+
+    if not _has_privacy_gate_command(text):
+        yield Violation(
+            path,
+            None,
+            f"privacy-llm-gate: mode '{mode}' + '<tracker>' body read implies "
+            f"private-content access but the Privacy-LLM gate-check is missing 
— "
+            f"add 'uv run --project <framework>/tools/privacy-llm/checker "
+            f"privacy-llm-check' in the Prerequisites / Step 0 section "
+            f"(see write-skill/security-checklist.md § Pattern 6)",
+            category=PRIVACY_CATEGORY,
+        )
+
+
 # ---------------------------------------------------------------------------
 # Trigger-phrase non-regression
 # ---------------------------------------------------------------------------
@@ -1035,6 +1224,7 @@ def run_validation(root: Path | None = None) -> 
list[Violation]:
             violations.extend(validate_frontmatter(path, text))
             violations.extend(validate_injection_guard(path, text))
             violations.extend(validate_principle_compliance(path, text))
+            violations.extend(validate_privacy_patterns(path, text))
             violations.extend(validate_trigger_preservation(path, text, 
repo_root=repo_root))
 
         # All skill files get link + placeholder + security-pattern validation
@@ -1107,6 +1297,7 @@ _SOFT_RULE_PREFIXES: tuple[str, ...] = (
     "security-pattern-4",
     "security-pattern-9",
     "gh-list-no-limit",
+    "privacy-llm-gate",
 )
 
 
diff --git a/tools/skill-validator/tests/test_validator.py 
b/tools/skill-validator/tests/test_validator.py
index 3c2e098..210c9c6 100644
--- a/tools/skill-validator/tests/test_validator.py
+++ b/tools/skill-validator/tests/test_validator.py
@@ -24,6 +24,11 @@ from pathlib import Path
 import pytest
 
 from skill_validator import (
+    _MODE_STATUS_BY_NAME,
+    _MODE_TAXONOMY,
+    _OFF_MODES,
+    _PRIVACY_EXTERNAL_CONTENT_MODES,
+    ALLOWED_MODES,
     FORBIDDEN_PATTERNS,
     GH_LIST_CATEGORY,
     INJECTION_GUARD_CALLOUT_SENTINEL,
@@ -32,9 +37,11 @@ from skill_validator import (
     INJECTION_GUARD_TODO_SENTINEL,
     MAX_METADATA_CHARS,
     PRINCIPLE_CATEGORY,
+    PRIVACY_CATEGORY,
     SECURITY_PATTERN_CATEGORY,
     SOFT_CATEGORIES,
     TRIGGER_PRESERVATION_CATEGORY,
+    _read_mode_table,
     collect_doc_files,
     collect_files_to_check,
     collect_skill_dirs,
@@ -54,6 +61,7 @@ from skill_validator import (
     validate_links,
     validate_placeholders,
     validate_principle_compliance,
+    validate_privacy_patterns,
     validate_security_patterns,
     validate_trigger_preservation,
 )
@@ -178,6 +186,26 @@ class TestValidateFrontmatter:
         violations = list(validate_frontmatter(path, text))
         assert violations == []
 
+    def test_mode_taxonomy_matches_docs_modes(self) -> None:
+        docs_modes = Path(__file__).parents[3] / "docs" / "modes.md"
+        modes_table = (
+            docs_modes.read_text(encoding="utf-8")
+            .split("## Modes at a glance", 1)[1]
+            .split("## Triage", 1)[0]
+        )
+        modes: dict[str, str] = {}
+        for line in modes_table.splitlines():
+            if not line.startswith("| **"):
+                continue
+            cells = [cell.strip() for cell in line.strip("|").split("|")]
+            modes[cells[0].strip("*")] = cells[2]
+        assert _read_mode_table() == modes
+        assert _MODE_STATUS_BY_NAME == modes
+        assert _MODE_TAXONOMY == set(modes)
+        assert _OFF_MODES == {mode for mode, status in modes.items() if status 
== "off"}
+        assert ALLOWED_MODES == _MODE_TAXONOMY - _OFF_MODES
+        assert _PRIVACY_EXTERNAL_CONTENT_MODES == frozenset(ALLOWED_MODES - 
{"Pairing"})
+
     def test_metadata_under_limit(self, tmp_path: Path) -> None:
         path = tmp_path / "SKILL.md"
         desc = "a" * 800
@@ -695,6 +723,30 @@ class TestPrincipleCompliance:
 
 
 class TestTriggerPreservation:
+    @pytest.fixture(autouse=True)
+    def _isolate_git_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Insulate temp-repo git calls from inherited git environment.
+
+        When the suite runs inside a pre-commit/prek hook, git env vars
+        (GIT_DIR, GIT_INDEX_FILE, GIT_OBJECT_DIRECTORY, ...) point at the host
+        repo. Without scrubbing them, ``git add``/``commit`` in the tmp_path
+        repo below — and the validator's ``git show`` — operate against the
+        host repo's index/objects instead of the isolated one, which fails
+        with "invalid object … Error building trees".
+        """
+        for var in (
+            "GIT_DIR",
+            "GIT_WORK_TREE",
+            "GIT_INDEX_FILE",
+            "GIT_OBJECT_DIRECTORY",
+            "GIT_ALTERNATE_OBJECT_DIRECTORIES",
+            "GIT_COMMON_DIR",
+            "GIT_NAMESPACE",
+            "GIT_PREFIX",
+            "GIT_CEILING_DIRECTORIES",
+        ):
+            monkeypatch.delenv(var, raising=False)
+
     def test_unavailable_base_ref_no_op(self, tmp_path: Path) -> None:
         """When git or the base ref isn't reachable, the check returns no 
violations."""
         skill = tmp_path / "SKILL.md"
@@ -1163,6 +1215,7 @@ class TestSoftCategories:
         assert INJECTION_GUARD_TODO_CATEGORY in SOFT_CATEGORIES
         assert SECURITY_PATTERN_CATEGORY in SOFT_CATEGORIES
         assert GH_LIST_CATEGORY in SOFT_CATEGORIES
+        assert PRIVACY_CATEGORY in SOFT_CATEGORIES
 
 
 # ---------------------------------------------------------------------------
@@ -1220,6 +1273,186 @@ class TestGhListLimit:
         assert not any("gh-list-no-limit" in v.message for v in violations)
 
 
+# ---------------------------------------------------------------------------
+# Pattern 6 — Privacy-LLM gate-check
+# ---------------------------------------------------------------------------
+
+_GATE = "privacy-llm-check"
+
+
+def _p6_skill(
+    mode: str = "Triage",
+    has_tracker: bool = True,
+    read_line: str = "gh issue view <N> --repo <tracker> --json body",
+    gate_text: str = "",
+) -> str:
+    parts = ["---", "name: test-skill", "description: bar", "license: 
Apache-2.0"]
+    if mode:
+        parts.append(f"mode: {mode}")
+    parts.append("---")
+    body_parts = ["# body"]
+    if has_tracker:
+        body_parts.append("Reads from the <tracker> repo.")
+    if read_line:
+        body_parts.append(f"Use `{read_line}`.")
+    if gate_text:
+        body_parts.append(gate_text)
+    parts.extend(body_parts)
+    return "\n".join(parts) + "\n"
+
+
+def _gate_block() -> str:
+    return "```bash\nuv run --project <framework>/tools/privacy-llm/checker 
\\\n  privacy-llm-check\n```\n"
+
+
+def _gate_section() -> str:
+    return f"## Step 0 — Pre-flight check\n\n{_gate_block()}"
+
+
+class TestPrivacyPatternP6:
+    def test_fires_triage_with_tracker_and_read_no_gate(self, tmp_path: Path) 
-> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_privacy_patterns(path, 
_p6_skill(mode="Triage")))
+        assert any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_fires_drafting_with_tracker_and_read_no_gate(self, tmp_path: 
Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_privacy_patterns(path, 
_p6_skill(mode="Drafting")))
+        assert any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_fires_mentoring_with_tracker_and_read_no_gate(self, tmp_path: 
Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_privacy_patterns(path, 
_p6_skill(mode="Mentoring")))
+        assert any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_violation_is_soft_category(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_privacy_patterns(path, _p6_skill()))
+        assert all(v.category == PRIVACY_CATEGORY for v in violations)
+
+    def test_silent_when_gate_present_in_fenced_command(self, tmp_path: Path) 
-> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_privacy_patterns(path, 
_p6_skill(gate_text=_gate_section())))
+        assert not any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_silent_when_gate_present_in_indented_fenced_command(self, 
tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        gate = (
+            "## Prerequisites\n\n"
+            "   ```bash\n"
+            "   uv run --project <framework>/tools/privacy-llm/checker \\\n"
+            "     privacy-llm-check\n"
+            "   ```"
+        )
+        violations = list(validate_privacy_patterns(path, 
_p6_skill(gate_text=gate)))
+        assert not any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_silent_when_gate_present_in_step_0_subsection(self, tmp_path: 
Path) -> None:
+        path = tmp_path / "SKILL.md"
+        gate = f"## Step 0 — Resolve inputs\n\n### Privacy-LLM 
gate\n\n{_gate_block()}"
+        violations = list(validate_privacy_patterns(path, 
_p6_skill(gate_text=gate)))
+        assert not any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_gate_in_html_comment_does_not_satisfy(self, tmp_path: Path) -> 
None:
+        path = tmp_path / "SKILL.md"
+        text = _p6_skill(gate_text=f"<!-- TODO: wire up {_GATE} -->")
+        violations = list(validate_privacy_patterns(path, text))
+        assert any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_gate_in_prose_does_not_satisfy(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        text = _p6_skill(gate_text=f"Remember to run {_GATE} later.")
+        violations = list(validate_privacy_patterns(path, text))
+        assert any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_gate_in_inline_code_does_not_satisfy(self, tmp_path: Path) -> 
None:
+        path = tmp_path / "SKILL.md"
+        text = _p6_skill(gate_text=f"TODO: `{_GATE}`")
+        violations = list(validate_privacy_patterns(path, text))
+        assert any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_gate_in_fenced_bad_example_does_not_satisfy(self, tmp_path: Path) 
-> None:
+        path = tmp_path / "SKILL.md"
+        gate = f"## Don't do this\n\n{_gate_block()}"
+        violations = list(validate_privacy_patterns(path, 
_p6_skill(gate_text=gate)))
+        assert any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_gate_in_later_fenced_section_does_not_satisfy(self, tmp_path: 
Path) -> None:
+        path = tmp_path / "SKILL.md"
+        gate = f"## History\n\n{_gate_block()}"
+        violations = list(validate_privacy_patterns(path, 
_p6_skill(gate_text=gate)))
+        assert any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_gate_after_step_0_section_does_not_satisfy(self, tmp_path: Path) 
-> None:
+        path = tmp_path / "SKILL.md"
+        gate = f"## Step 0 — Pre-flight check\n\nNo gate here.\n\n## 
History\n\n{_gate_block()}"
+        violations = list(validate_privacy_patterns(path, 
_p6_skill(gate_text=gate)))
+        assert any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_gate_in_appendix_step_0_snippet_does_not_satisfy(self, tmp_path: 
Path) -> None:
+        path = tmp_path / "SKILL.md"
+        gate = f"## Appendix: Step 0 from an older version\n\n{_gate_block()}"
+        violations = list(validate_privacy_patterns(path, 
_p6_skill(gate_text=gate)))
+        assert any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_gate_in_step_0_bad_example_subsection_does_not_satisfy(self, 
tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        gate = f"## Step 0 — Pre-flight check\n\n### Bad 
example\n\n{_gate_block()}"
+        violations = list(validate_privacy_patterns(path, 
_p6_skill(gate_text=gate)))
+        assert any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_rest_issue_get_counts_as_tracker_body_read(self, tmp_path: Path) 
-> None:
+        path = tmp_path / "SKILL.md"
+        text = _p6_skill(read_line="gh api repos/<tracker>/issues/<N>")
+        violations = list(validate_privacy_patterns(path, text))
+        assert any("privacy-llm-gate" in v.message for v in violations)
+
+    def 
test_rest_issue_get_with_leading_slash_counts_as_tracker_body_read(self, 
tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        text = _p6_skill(read_line="gh api /repos/<tracker>/issues/<N>")
+        violations = list(validate_privacy_patterns(path, text))
+        assert any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_rest_issue_patch_is_exempt(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        text = _p6_skill(read_line="gh api repos/<tracker>/issues/<N> -X PATCH 
-f title=x")
+        violations = list(validate_privacy_patterns(path, text))
+        assert not any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_multiline_rest_issue_patch_is_exempt(self, tmp_path: Path) -> 
None:
+        path = tmp_path / "SKILL.md"
+        text = _p6_skill(
+            read_line="gh api repos/<tracker>/issues/<N> \\\n  -X PATCH \\\n  
-f title=x",
+        )
+        violations = list(validate_privacy_patterns(path, text))
+        assert not any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_silent_when_no_tracker_reference(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_privacy_patterns(path, 
_p6_skill(has_tracker=False, read_line="")))
+        assert not any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_silent_when_tracker_but_no_issue_body_read(self, tmp_path: Path) 
-> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_privacy_patterns(path, 
_p6_skill(read_line="")))
+        assert not any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_silent_when_no_mode(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_privacy_patterns(path, _p6_skill(mode="")))
+        assert not any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_silent_for_pairing_mode(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_privacy_patterns(path, 
_p6_skill(mode="Pairing")))
+        assert not any("privacy-llm-gate" in v.message for v in violations)
+
+    def test_silent_on_sub_doc(self, tmp_path: Path) -> None:
+        path = tmp_path / "step-0-preflight.md"
+        violations = list(validate_privacy_patterns(path, _p6_skill()))
+        assert not any("privacy-llm-gate" in v.message for v in violations)
+
+
 # ---------------------------------------------------------------------------
 # is_placeholder_url
 # ---------------------------------------------------------------------------

(airflow-steward) branch main updated: add Privacy-LLM gate-check validator (#215)

Reply via email to