https://github.com/python/cpython/commit/0ce9fb7e3b192aefdb55e86f3e4c0e504445812d
commit: 0ce9fb7e3b192aefdb55e86f3e4c0e504445812d
branch: main
author: Pablo Galindo Salgado <[email protected]>
committer: pablogsal <[email protected]>
date: 2025-09-18T02:17:04+01:00
summary:
gh-138970: Add general metadata system to the peg generator (#138971)
files:
M Lib/test/test_peg_generator/test_pegen.py
M Tools/peg_generator/pegen/c_generator.py
M Tools/peg_generator/pegen/grammar.py
M Tools/peg_generator/pegen/grammar_parser.py
M Tools/peg_generator/pegen/metagrammar.gram
diff --git a/Lib/test/test_peg_generator/test_pegen.py
b/Lib/test/test_peg_generator/test_pegen.py
index d912c55812397d..0387b9395611b0 100644
--- a/Lib/test/test_peg_generator/test_pegen.py
+++ b/Lib/test/test_peg_generator/test_pegen.py
@@ -1106,3 +1106,49 @@ def test_deep_nested_rule(self) -> None:
)
self.assertEqual(output, expected_output)
+
+ def test_rule_flags(self) -> None:
+ """Test the new rule flags syntax that accepts arbitrary lists of
flags."""
+ # Test grammar with various flag combinations
+ grammar_source = """
+ start: simple_rule
+
+ simple_rule (memo):
+ | "hello"
+
+ multi_flag_rule (memo, custom, test):
+ | "world"
+
+ single_custom_flag (custom):
+ | "test"
+
+ no_flags_rule:
+ | "plain"
+ """
+
+ grammar: Grammar = parse_string(grammar_source, GrammarParser)
+ rules = grammar.rules
+
+ # Test memo-only rule
+ simple_rule = rules['simple_rule']
+ self.assertTrue(simple_rule.memo, "simple_rule should have memo=True")
+ self.assertEqual(simple_rule.flags, frozenset(['memo']),
+ f"simple_rule flags should be {'memo'}, got
{simple_rule.flags}")
+
+ # Test multi-flag rule
+ multi_flag_rule = rules['multi_flag_rule']
+ self.assertTrue(multi_flag_rule.memo, "multi_flag_rule should have
memo=True")
+ self.assertEqual(multi_flag_rule.flags, frozenset({'memo', 'custom',
'test'}),
+ f"multi_flag_rule flags should contain memo, custom,
test, got {multi_flag_rule.flags}")
+
+ # Test single custom flag rule
+ single_custom_rule = rules['single_custom_flag']
+ self.assertFalse(single_custom_rule.memo, "single_custom_flag should
have memo=False")
+ self.assertEqual(single_custom_rule.flags, frozenset(['custom']),
+ f"single_custom_flag flags should be {'custom'}, got
{single_custom_rule.flags}")
+
+ # Test no flags rule
+ no_flags_rule = rules['no_flags_rule']
+ self.assertFalse(no_flags_rule.memo, "no_flags_rule should have
memo=False")
+ self.assertEqual(no_flags_rule.flags, [],
+ f"no_flags_rule flags should be the empty set, got
{no_flags_rule.flags}")
diff --git a/Tools/peg_generator/pegen/c_generator.py
b/Tools/peg_generator/pegen/c_generator.py
index fa75174ea0d59d..ffa73a64f21cfe 100644
--- a/Tools/peg_generator/pegen/c_generator.py
+++ b/Tools/peg_generator/pegen/c_generator.py
@@ -595,7 +595,7 @@ def _set_up_rule_memoization(self, node: Rule, result_type:
str) -> None:
self.print(f"{node.name}_raw(Parser *p)")
def _should_memoize(self, node: Rule) -> bool:
- return node.memo and not node.left_recursive
+ return "memo" in node.flags and not node.left_recursive
def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type:
str) -> None:
memoize = self._should_memoize(node)
diff --git a/Tools/peg_generator/pegen/grammar.py
b/Tools/peg_generator/pegen/grammar.py
index cca8584a632071..d3c2eca6615a9f 100644
--- a/Tools/peg_generator/pegen/grammar.py
+++ b/Tools/peg_generator/pegen/grammar.py
@@ -58,11 +58,11 @@ def __iter__(self) -> Iterator[Rule]:
class Rule:
- def __init__(self, name: str, type: str | None, rhs: Rhs, memo: object |
None = None):
+ def __init__(self, name: str, type: str | None, rhs: Rhs, flags:
frozenset[str] | None = None):
self.name = name
self.type = type
self.rhs = rhs
- self.memo = bool(memo)
+ self.flags = flags or frozenset()
self.left_recursive = False
self.leader = False
@@ -135,7 +135,6 @@ def __repr__(self) -> str:
class Rhs:
def __init__(self, alts: list[Alt]):
self.alts = alts
- self.memo: tuple[str | None, str] | None = None
def __str__(self) -> str:
return " | ".join(str(alt) for alt in self.alts)
@@ -263,7 +262,6 @@ class Repeat:
def __init__(self, node: Plain):
self.node = node
- self.memo: tuple[str | None, str] | None = None
def __iter__(self) -> Iterator[Plain]:
yield self.node
diff --git a/Tools/peg_generator/pegen/grammar_parser.py
b/Tools/peg_generator/pegen/grammar_parser.py
index 2e3a607f7209b0..4fa2739270773f 100644
--- a/Tools/peg_generator/pegen/grammar_parser.py
+++ b/Tools/peg_generator/pegen/grammar_parser.py
@@ -147,12 +147,12 @@ def rules(self) -> Optional[RuleList]:
@memoize
def rule(self) -> Optional[Rule]:
- # rule: rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT |
rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":"
alts NEWLINE
+ # rule: rulename flags? ":" alts NEWLINE INDENT more_alts DEDENT |
rulename flags? ":" NEWLINE INDENT more_alts DEDENT | rulename flags? ":" alts
NEWLINE
mark = self._mark()
if (
(rulename := self.rulename())
and
- (opt := self.memoflag(),)
+ (flags := self.flags(),)
and
(literal := self.expect(":"))
and
@@ -166,12 +166,12 @@ def rule(self) -> Optional[Rule]:
and
(_dedent := self.expect('DEDENT'))
):
- return Rule ( rulename [0] , rulename [1] , Rhs ( alts . alts +
more_alts . alts ) , memo = opt )
+ return Rule ( rulename [0] , rulename [1] , Rhs ( alts . alts +
more_alts . alts ) , flags = flags )
self._reset(mark)
if (
(rulename := self.rulename())
and
- (opt := self.memoflag(),)
+ (flags := self.flags(),)
and
(literal := self.expect(":"))
and
@@ -183,12 +183,12 @@ def rule(self) -> Optional[Rule]:
and
(_dedent := self.expect('DEDENT'))
):
- return Rule ( rulename [0] , rulename [1] , more_alts , memo = opt
)
+ return Rule ( rulename [0] , rulename [1] , more_alts , flags =
flags )
self._reset(mark)
if (
(rulename := self.rulename())
and
- (opt := self.memoflag(),)
+ (flags := self.flags(),)
and
(literal := self.expect(":"))
and
@@ -196,7 +196,7 @@ def rule(self) -> Optional[Rule]:
and
(_newline := self.expect('NEWLINE'))
):
- return Rule ( rulename [0] , rulename [1] , alts , memo = opt )
+ return Rule ( rulename [0] , rulename [1] , alts , flags = flags )
self._reset(mark)
return None
@@ -219,17 +219,28 @@ def rulename(self) -> Optional[RuleName]:
return None
@memoize
- def memoflag(self) -> Optional[str]:
- # memoflag: '(' "memo" ')'
+ def flags(self) -> Optional[frozenset [str]]:
+ # flags: '(' ','.flag+ ')'
mark = self._mark()
if (
(literal := self.expect('('))
and
- (literal_1 := self.expect("memo"))
+ (a := self._gather_2())
and
- (literal_2 := self.expect(')'))
+ (literal_1 := self.expect(')'))
+ ):
+ return frozenset ( a )
+ self._reset(mark)
+ return None
+
+ @memoize
+ def flag(self) -> Optional[str]:
+ # flag: NAME
+ mark = self._mark()
+ if (
+ (name := self.name())
):
- return "memo"
+ return name . string
self._reset(mark)
return None
@@ -661,8 +672,38 @@ def target_atom(self) -> Optional[str]:
self._reset(mark)
return None
+ @memoize
+ def _loop0_1(self) -> Optional[Any]:
+ # _loop0_1: ',' flag
+ mark = self._mark()
+ children = []
+ while (
+ (literal := self.expect(','))
+ and
+ (elem := self.flag())
+ ):
+ children.append(elem)
+ mark = self._mark()
+ self._reset(mark)
+ return children
+
+ @memoize
+ def _gather_2(self) -> Optional[Any]:
+ # _gather_2: flag _loop0_1
+ mark = self._mark()
+ if (
+ (elem := self.flag())
+ is not None
+ and
+ (seq := self._loop0_1())
+ is not None
+ ):
+ return [elem] + seq
+ self._reset(mark)
+ return None
+
KEYWORDS = ()
- SOFT_KEYWORDS = ('memo',)
+ SOFT_KEYWORDS = ()
if __name__ == '__main__':
diff --git a/Tools/peg_generator/pegen/metagrammar.gram
b/Tools/peg_generator/pegen/metagrammar.gram
index f484c4781823bc..cae91ab9c4165b 100644
--- a/Tools/peg_generator/pegen/metagrammar.gram
+++ b/Tools/peg_generator/pegen/metagrammar.gram
@@ -50,19 +50,21 @@ rules[RuleList]:
| rule { [rule] }
rule[Rule]:
- | rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT {
- Rule(rulename[0], rulename[1], Rhs(alts.alts + more_alts.alts),
memo=opt) }
- | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT {
- Rule(rulename[0], rulename[1], more_alts, memo=opt) }
- | rulename memoflag? ":" alts NEWLINE { Rule(rulename[0], rulename[1],
alts, memo=opt) }
+ | rulename flags=flags? ":" alts NEWLINE INDENT more_alts DEDENT {
+ Rule(rulename[0], rulename[1], Rhs(alts.alts + more_alts.alts),
flags=flags) }
+ | rulename flags=flags? ":" NEWLINE INDENT more_alts DEDENT {
+ Rule(rulename[0], rulename[1], more_alts, flags=flags) }
+ | rulename flags=flags? ":" alts NEWLINE { Rule(rulename[0], rulename[1],
alts, flags=flags) }
rulename[RuleName]:
| NAME annotation { (name.string, annotation) }
| NAME { (name.string, None) }
-# In the future this may return something more complicated
-memoflag[str]:
- | '(' "memo" ')' { "memo" }
+flags[frozenset[str]]:
+ | '(' a=','.flag+ ')' { frozenset(a) }
+
+flag[str]:
+ | NAME { name.string }
alts[Rhs]:
| alt "|" alts { Rhs([alt] + alts.alts)}
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]