This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-steward.git
The following commit(s) were added to refs/heads/main by this push:
new f68064f feat(skill-validator): flag -f field='value' for susceptible
fields (Pattern 2) (#218)
f68064f is described below
commit f68064f0707c0de44189e0dc3c42d412166d3a14
Author: Justin Mclean <[email protected]>
AuthorDate: Mon May 25 17:35:02 2026 +1000
feat(skill-validator): flag -f field='value' for susceptible fields
(Pattern 2) (#218)
---
.claude/skills/security-issue-sync/SKILL.md | 29 +++++-
.../src/skill_validator/__init__.py | 66 ++++++++++++
tools/skill-validator/tests/test_validator.py | 115 +++++++++++++++++++++
3 files changed, 206 insertions(+), 4 deletions(-)
diff --git a/.claude/skills/security-issue-sync/SKILL.md
b/.claude/skills/security-issue-sync/SKILL.md
index ea4490f..6066ef1 100644
--- a/.claude/skills/security-issue-sync/SKILL.md
+++ b/.claude/skills/security-issue-sync/SKILL.md
@@ -1023,19 +1023,40 @@ will change and *why*. Group them by category:
a date. For a provider-wave milestone the description should name
the release manager so the advisory owner is visible at a glance:
+ **Use the Write tool** (not Bash) to write each field value verbatim
+ to a temp file, then pass via `-F`:
+
+ *Write tool call:* `file_path: /tmp/ms-title-<tracker>.txt`,
+ `content: <Milestone>`
+
+ *Write tool call:* `file_path: /tmp/ms-desc-<tracker>.txt`,
+ `content: <optional>`
+
```bash
# Core or chart (due_on mirrored from upstream when available):
gh api repos/<tracker>/milestones \
- -f title='<Milestone>' -f state=open \
- -f description='<optional>' \
+ -F title=@/tmp/ms-title-<tracker>.txt \
+ -f state=open \
+ -F description=@/tmp/ms-desc-<tracker>.txt \
-f due_on='<ISO8601 from upstream, omit if upstream has none>'
+ ```
+ For provider waves, update the Write tool calls with:
+
+ *Write tool call:* `file_path: /tmp/ms-title-<tracker>.txt`,
+ `content: Providers YYYY-MM-DD`
+
+ *Write tool call:* `file_path: /tmp/ms-desc-<tracker>.txt`,
+ `content: Providers release cut on YYYY-MM-DD, RM: <Name>`
+
+ ```bash
# Provider wave (cut date + RM from the Release Plan wiki /
# dev@ [VOTE] thread; upstream does not milestone providers
# waves so due_on typically comes from the wiki):
gh api repos/<tracker>/milestones \
- -f title='Providers YYYY-MM-DD' -f state=open \
- -f description='Providers release cut on YYYY-MM-DD, RM: <Name>'
+ -F title=@/tmp/ms-title-<tracker>.txt \
+ -f state=open \
+ -F description=@/tmp/ms-desc-<tracker>.txt
```
After the create call, assign the milestone to the issue via
diff --git a/tools/skill-validator/src/skill_validator/__init__.py
b/tools/skill-validator/src/skill_validator/__init__.py
index ee99270..8b33723 100644
--- a/tools/skill-validator/src/skill_validator/__init__.py
+++ b/tools/skill-validator/src/skill_validator/__init__.py
@@ -193,6 +193,7 @@ INJECTION_GUARD_TODO_CATEGORY = "injection_guard_todo"
GH_LIST_CATEGORY = "gh_list_no_limit"
SECURITY_PATTERN_CATEGORY = "security_pattern"
PRIVACY_CATEGORY = "privacy"
+LOWERCASE_F_FIELD_CATEGORY = "lowercase_f_field"
SOFT_CATEGORIES: frozenset[str] = frozenset(
{
PRINCIPLE_CATEGORY,
@@ -201,6 +202,7 @@ SOFT_CATEGORIES: frozenset[str] = frozenset(
SECURITY_PATTERN_CATEGORY,
GH_LIST_CATEGORY,
PRIVACY_CATEGORY,
+ LOWERCASE_F_FIELD_CATEGORY,
}
)
@@ -1146,6 +1148,68 @@ def collect_files_to_check(root: Path | None = None) ->
list[Path]:
return list(base.rglob("*.md"))
+# ---------------------------------------------------------------------------
+# Lowercase -f field check (Pattern 2)
+# ---------------------------------------------------------------------------
+
+# Field names that commonly carry attacker-controlled content and must use
+# -F field=@file rather than -f field='value'. Fields that are always
+# framework-internal static values (query strings, state toggles, OIDs,
+# sort keys, etc.) are excluded — they never originate outside the framework.
+_LOWERCASE_F_SUSCEPTIBLE_FIELDS: frozenset[str] = frozenset(
+ {"title", "body", "description", "name", "label", "milestone"},
+)
+
+# Matches -f <susceptible-field>='...' or -f <susceptible-field>="..."
+# The field name must be one of the susceptible set; the value must start
+# with a quote (single or double) immediately after the equals sign.
+_LOWERCASE_F_FIELD_RE = re.compile(
+ r"-f\s+(" + "|".join(sorted(_LOWERCASE_F_SUSCEPTIBLE_FIELDS)) + r")=['\"]",
+)
+
+# Files that intentionally document the bad pattern and must not be flagged.
+_LOWERCASE_F_SKIP_SUFFIXES: tuple[str, ...] =
("write-skill/security-checklist.md",)
+
+
+def validate_lowercase_f_field(path: Path, text: str) -> Iterable[Violation]:
+ """Flag ``-f field='value'`` / ``-f field="value"`` for susceptible fields.
+
+ Passing user-supplied or attacker-controlled content (titles, bodies,
+ descriptions, names) as inline ``-f field='...'`` arguments is a
+ shell-injection vector — the value goes through shell quoting and can
+ break out. The safe form is ``-F field=@file``, which reads the value
+ verbatim from a temp file written by the Write tool, bypassing the shell
+ tokeniser entirely.
+
+ Only flags fields in ``_LOWERCASE_F_SUSCEPTIBLE_FIELDS``; safe static
+ fields (``query``, ``state``, ``oid``, ``type``, ``sort``, …) are
+ ignored. Inline backtick prose mentions are also skipped.
+
+ All violations are **SOFT** — advisory only.
+ """
+ if any(str(path).endswith(suffix) for suffix in
_LOWERCASE_F_SKIP_SUFFIXES):
+ return
+ # Only inspect content inside fenced code blocks (real commands).
+ # Prose mentions outside fenced blocks (e.g. in backtick spans or plain
+ # text) are skipped by this gate — no separate inline-span check needed.
+ fenced_spans = [m.span() for m in _FENCED_CODE_RE.finditer(text)]
+ for m in _LOWERCASE_F_FIELD_RE.finditer(text):
+ pos = m.start()
+ if not any(fs <= pos < fe for fs, fe in fenced_spans):
+ continue
+ field = m.group(1)
+ line_no = text[:pos].count("\n") + 1
+ yield Violation(
+ path,
+ line_no,
+ f"lowercase-f-field: '-f {field}=<quoted>' passes a susceptible
field "
+ f"as an inline shell argument — use '-F {field}=@<tmpfile>'
written "
+ f"by the Write tool instead to avoid shell-injection risk "
+ f"(see write-skill/security-checklist.md § Pattern 2)",
+ category=LOWERCASE_F_FIELD_CATEGORY,
+ )
+
+
def collect_skill_dirs(root: Path | None = None) -> set[Path]:
"""Return the set of skill directories (immediate children of
.claude/skills)."""
base = (root or find_repo_root()) / SKILLS_DIR
@@ -1232,6 +1296,7 @@ def run_validation(root: Path | None = None) ->
list[Violation]:
violations.extend(validate_placeholders(path, text))
violations.extend(validate_security_patterns(path, text))
violations.extend(validate_gh_list_limit(path, text))
+ violations.extend(validate_lowercase_f_field(path, text))
return violations
@@ -1290,6 +1355,7 @@ _SOFT_RULE_PREFIXES: tuple[str, ...] = (
"chain-handoff",
"criteria-source",
"distinct-from",
+ "lowercase-f-field",
"parenthetical rationale",
"trigger phrase",
"injection-guard TODO",
diff --git a/tools/skill-validator/tests/test_validator.py
b/tools/skill-validator/tests/test_validator.py
index 210c9c6..0399230 100644
--- a/tools/skill-validator/tests/test_validator.py
+++ b/tools/skill-validator/tests/test_validator.py
@@ -35,6 +35,7 @@ from skill_validator import (
INJECTION_GUARD_CATEGORY,
INJECTION_GUARD_TODO_CATEGORY,
INJECTION_GUARD_TODO_SENTINEL,
+ LOWERCASE_F_FIELD_CATEGORY,
MAX_METADATA_CHARS,
PRINCIPLE_CATEGORY,
PRIVACY_CATEGORY,
@@ -59,6 +60,7 @@ from skill_validator import (
validate_gh_list_limit,
validate_injection_guard,
validate_links,
+ validate_lowercase_f_field,
validate_placeholders,
validate_principle_compliance,
validate_privacy_patterns,
@@ -1203,6 +1205,118 @@ class TestSecurityPatterns:
assert all(v.category == SECURITY_PATTERN_CATEGORY for v in violations)
+# ---------------------------------------------------------------------------
+# Lowercase -f field check (Pattern 2)
+# ---------------------------------------------------------------------------
+
+
+def _fenced_skill_lf(cmd: str) -> str:
+ """Wrap *cmd* in a minimal SKILL.md with a fenced bash block."""
+ return f"---\nname: test\ndescription: test\nlicense:
Apache-2.0\n---\n\n```bash\n{cmd}\n```\n"
+
+
+class TestLowercaseFField:
+ def test_title_single_quote_flagged(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill_lf("gh api repos/<tracker>/milestones -f
title='v1.0'")
+ violations = list(validate_lowercase_f_field(path, text))
+ assert len(violations) == 1
+ assert violations[0].category == LOWERCASE_F_FIELD_CATEGORY
+ assert "lowercase-f-field" in violations[0].message
+ assert "title" in violations[0].message
+
+ def test_title_double_quote_flagged(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill_lf('gh api repos/<tracker>/milestones -f
title="v1.0"')
+ violations = list(validate_lowercase_f_field(path, text))
+ assert len(violations) == 1
+ assert violations[0].category == LOWERCASE_F_FIELD_CATEGORY
+
+ def test_description_flagged(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill_lf("gh api repos/<tracker>/milestones -f
description='some text'")
+ violations = list(validate_lowercase_f_field(path, text))
+ assert len(violations) == 1
+ assert "description" in violations[0].message
+
+ def test_name_flagged(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill_lf("gh api repos/<tracker>/labels -f name='bug'")
+ violations = list(validate_lowercase_f_field(path, text))
+ assert len(violations) == 1
+
+ def test_body_flagged(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill_lf("gh api repos/<tracker>/issues -f body='some
text'")
+ violations = list(validate_lowercase_f_field(path, text))
+ assert len(violations) == 1
+
+ def test_query_not_flagged(self, tmp_path: Path) -> None:
+ """GraphQL query strings are always framework-hardcoded — not
susceptible."""
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill_lf("gh api graphql -f query='{ viewer { login }
}'")
+ violations = list(validate_lowercase_f_field(path, text))
+ assert violations == []
+
+ def test_state_not_flagged(self, tmp_path: Path) -> None:
+ """Static state values (open/closed) are always safe."""
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill_lf("gh api repos/<tracker>/milestones -f
state=open")
+ violations = list(validate_lowercase_f_field(path, text))
+ assert violations == []
+
+ def test_oid_not_flagged(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill_lf("gh api graphql -f oid=abc123def456")
+ violations = list(validate_lowercase_f_field(path, text))
+ assert violations == []
+
+ def test_uppercase_F_not_flagged(self, tmp_path: Path) -> None:
+ """Uppercase -F is the correct form — must never be flagged."""
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill_lf("gh api repos/<tracker>/issues -F
title=@/tmp/title.txt")
+ violations = list(validate_lowercase_f_field(path, text))
+ assert violations == []
+
+ def test_prose_mention_not_flagged(self, tmp_path: Path) -> None:
+ """Inline backtick prose like ``-f title='...'`` must not fire."""
+ path = tmp_path / "SKILL.md"
+ text = (
+ "---\nname: test\ndescription: test\nlicense: Apache-2.0\n---\n\n"
+ "Avoid using `-f title='value'` — use `-F title=@file` instead.\n"
+ )
+ violations = list(validate_lowercase_f_field(path, text))
+ assert violations == []
+
+ def test_outside_fenced_block_not_flagged(self, tmp_path: Path) -> None:
+ """Bare prose outside a fenced block must not fire."""
+ path = tmp_path / "SKILL.md"
+ text = (
+ "---\nname: test\ndescription: test\nlicense: Apache-2.0\n---\n\n"
+ "Run: gh api milestones -f title='v1'\n"
+ )
+ violations = list(validate_lowercase_f_field(path, text))
+ assert violations == []
+
+ def test_checklist_file_skipped(self, tmp_path: Path) -> None:
+ """The security checklist documents the bad pattern — must not
self-flag."""
+ path = tmp_path / "write-skill" / "security-checklist.md"
+ path.parent.mkdir()
+ text = _fenced_skill_lf("gh api repos/<tracker>/milestones -f
title='v1.0'")
+ violations = list(validate_lowercase_f_field(path, text))
+ assert violations == []
+
+ def test_violation_line_number_correct(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill_lf("gh api repos/<tracker>/milestones -f
title='v1.0'")
+ # Layout: 1:--- 2:name 3:description 4:license 5:--- 6:blank 7:```bash
8:command
+ violations = list(validate_lowercase_f_field(path, text))
+ assert violations[0].line == 8
+
+ def test_lowercase_f_field_in_soft_categories(self) -> None:
+ assert LOWERCASE_F_FIELD_CATEGORY in SOFT_CATEGORIES
+
+
# ---------------------------------------------------------------------------
# SOFT category exposure
# ---------------------------------------------------------------------------
@@ -1216,6 +1330,7 @@ class TestSoftCategories:
assert SECURITY_PATTERN_CATEGORY in SOFT_CATEGORIES
assert GH_LIST_CATEGORY in SOFT_CATEGORIES
assert PRIVACY_CATEGORY in SOFT_CATEGORIES
+ assert LOWERCASE_F_FIELD_CATEGORY in SOFT_CATEGORIES
# ---------------------------------------------------------------------------