justinmclean commented on code in PR #215:
URL: https://github.com/apache/airflow-steward/pull/215#discussion_r3295828041


##########
tools/skill-validator/src/skill_validator/__init__.py:
##########
@@ -638,6 +708,118 @@ def validate_principle_compliance(path: Path, text: str) 
-> Iterable[Violation]:
         )
 
 
+# ---------------------------------------------------------------------------
+# Privacy-LLM gate-check (write-skill/security-checklist.md ยง Pattern 6)
+# ---------------------------------------------------------------------------
+
+
+def _heading_text(raw: str) -> str:
+    text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", raw.strip())
+    text = text.strip("#").strip()
+    return text
+
+
+def _fenced_code_blocks(text: str) -> list[str]:
+    return [m.group(0) for m in _FENCED_CODE_RE.finditer(text)]
+
+
+def _fenced_code_blocks_in_privacy_gate_sections(text: str) -> list[str]:
+    """Return fenced code blocks inside Prerequisites / Preflight / Step 0 
sections."""
+    heading_re = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
+    headings = list(heading_re.finditer(text))
+    heading_index = 0
+    stack: list[tuple[int, str]] = []
+    blocks: list[str] = []
+
+    for block in _FENCED_CODE_RE.finditer(text):
+        while heading_index < len(headings) and 
headings[heading_index].start() < block.start():
+            heading = headings[heading_index]
+            level = len(heading.group(1))
+            title = _heading_text(heading.group(2))
+            stack = [(old_level, old_title) for old_level, old_title in stack 
if old_level < level]
+            stack.append((level, title))
+            heading_index += 1
+
+        titles = [title for _, title in stack]
+        if any(_ANTI_EXAMPLE_SECTION_RE.search(title) for title in titles):
+            continue
+        if any(_PRIVACY_GATE_SECTION_RE.search(title) for title in titles):
+            blocks.append(block.group(0))
+
+    return blocks
+
+
+def _shell_logical_lines(text: str) -> list[str]:
+    lines: list[str] = []
+    current: list[str] = []
+    for line in text.splitlines():
+        stripped = line.rstrip()
+        if stripped.endswith("\\"):
+            current.append(stripped[:-1].strip())
+            continue
+        if current:
+            current.append(stripped.strip())
+            lines.append(" ".join(part for part in current if part))
+            current = []
+        else:
+            lines.append(line)
+    if current:
+        lines.append(" ".join(part for part in current if part))
+    return lines
+
+
+def _has_tracker_body_read(text: str) -> bool:
+    body = _strip_html_comments(_skill_body(text))
+    if _TRACKER_ISSUE_VIEW_RE.search(body):
+        return True
+    for command in _shell_logical_lines(body):
+        if _TRACKER_ISSUE_API_RE.search(command) and not 
_TRACKER_ISSUE_API_MUTATION_RE.search(command):
+            return True
+    return False

Review Comment:
   this is a repeat of the last comment.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to