This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-steward.git


The following commit(s) were added to refs/heads/main by this push:
     new b9258c3  fix(skill-validator): match GitHub anchor algorithm for 
repeated whitespace (#65)
b9258c3 is described below

commit b9258c3ea24664fb4ae2d392c1fca6e0ae254347
Author: André Ahlert <[email protected]>
AuthorDate: Wed May 6 12:07:24 2026 -0300

    fix(skill-validator): match GitHub anchor algorithm for repeated whitespace 
(#65)
    
    `slugify` was using `re.sub(r"[\s]+", "-", text)` which collapses runs of
    whitespace into a single dash. GitHub's anchor renderer (and doctoc)
    replace each whitespace character one-for-one, so headings whose text
    contains an em-dash (which strips to "" between two surrounding spaces)
    yield double-dash anchors. Concretely, "Mode B — conversational
    mentoring" must slugify to `mode-b--conversational-mentoring`, matching
    the doctoc-generated TOC anchor in `docs/modes.md`.
    
    Validator was therefore producing single-dash slugs and reporting ~38
    false-positive "anchor not found" violations against the real repo.
    
    Drop the `+` quantifier so each whitespace becomes its own dash, and
    update the existing `test_multiple_spaces` expectation to match the
    actual GitHub algorithm. Add `test_em_dash_in_heading` to pin the
    canonical case.
---
 tools/skill-validator/src/skill_validator/__init__.py |  2 +-
 tools/skill-validator/tests/test_validator.py         | 12 +++++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/tools/skill-validator/src/skill_validator/__init__.py 
b/tools/skill-validator/src/skill_validator/__init__.py
index fe93b32..cb4b768 100644
--- a/tools/skill-validator/src/skill_validator/__init__.py
+++ b/tools/skill-validator/src/skill_validator/__init__.py
@@ -111,7 +111,7 @@ LINK_PATTERN = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
 
 # Anchor slug generation — mirrors doctoc/GitHub logic loosely.
 ANCHOR_PATTERN = re.compile(r"[^\w\s-]+")
-ANCHOR_SPACE_PATTERN = re.compile(r"[\s]+")
+ANCHOR_SPACE_PATTERN = re.compile(r"\s")
 
 
 # ---------------------------------------------------------------------------
diff --git a/tools/skill-validator/tests/test_validator.py 
b/tools/skill-validator/tests/test_validator.py
index b346903..3b991f8 100644
--- a/tools/skill-validator/tests/test_validator.py
+++ b/tools/skill-validator/tests/test_validator.py
@@ -159,7 +159,17 @@ class TestSlugify:
         assert slugify("What's new?") == "whats-new"
 
     def test_multiple_spaces(self) -> None:
-        assert slugify("A  B   C") == "a-b-c"
+        # GitHub's anchor algorithm replaces each whitespace character with
+        # a dash one-for-one rather than collapsing runs. Doctoc and the
+        # GitHub renderer agree on this; the canonical case is em-dash
+        # headings, which strip to "" and leave two adjacent spaces.
+        assert slugify("A  B   C") == "a--b---c"
+
+    def test_em_dash_in_heading(self) -> None:
+        assert (
+            slugify("Mode B — conversational mentoring")
+            == "mode-b--conversational-mentoring"
+        )
 
 
 class TestExtractHeadings:

Reply via email to