https://github.com/python/cpython/commit/08c78e02fab4a1c9c075637422d621f9c740959a
commit: 08c78e02fab4a1c9c075637422d621f9c740959a
branch: main
author: Eric V. Smith <[email protected]>
committer: ericvsmith <[email protected]>
date: 2025-05-26T13:49:39-04:00
summary:
gh-134675: Add t-string prefixes to tokenizer module, lexical analysis doc, and
add a test to make sure we catch this error in the future. (#134734)
* Add t-string prefixes to _all_string_prefixes, and add a test to make sure we
catch this error in the future.
* Update lexical analysis docs for t-string prefixes.
files:
M Doc/reference/lexical_analysis.rst
M Lib/test/test_tokenize.py
M Lib/tokenize.py
diff --git a/Doc/reference/lexical_analysis.rst
b/Doc/reference/lexical_analysis.rst
index 6c4a4ea81afe29..b22eb4db7945d1 100644
--- a/Doc/reference/lexical_analysis.rst
+++ b/Doc/reference/lexical_analysis.rst
@@ -489,8 +489,9 @@ String literals are described by the following lexical
definitions:
.. productionlist:: python-grammar
stringliteral: [`stringprefix`](`shortstring` | `longstring`)
- stringprefix: "r" | "u" | "R" | "U" | "f" | "F"
+ stringprefix: "r" | "u" | "R" | "U" | "f" | "F" | "t" | "T"
: | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF"
+ : | "tr" | "Tr" | "tR" | "TR" | "rt" | "rT" | "Rt" | "RT"
shortstring: "'" `shortstringitem`* "'" | '"' `shortstringitem`* '"'
longstring: "'''" `longstringitem`* "'''" | '"""' `longstringitem`* '"""'
shortstringitem: `shortstringchar` | `stringescapeseq`
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index e6b19fe1812d44..d4b51841891b28 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1,6 +1,8 @@
import contextlib
+import itertools
import os
import re
+import string
import tempfile
import token
import tokenize
@@ -3238,5 +3240,59 @@ def test_exact_flag(self):
self.check_output(source, expect, flag)
+class StringPrefixTest(unittest.TestCase):
+ def test_prefixes(self):
+ # Get the list of defined string prefixes. I don't see an
+ # obvious documented way of doing this, but probably the best
+ # thing is to split apart tokenize.StringPrefix.
+
+ # Make sure StringPrefix begins and ends in parens.
+ self.assertEqual(tokenize.StringPrefix[0], '(')
+ self.assertEqual(tokenize.StringPrefix[-1], ')')
+
+ # Then split apart everything else by '|'.
+ defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|'))
+
+ # Now compute the actual string prefixes, by exec-ing all
+ # valid prefix combinations, followed by an empty string.
+
+ # Try all prefix lengths until we find a length that has zero
+ # valid prefixes. This will miss the case where for example
+ # there are no valid 3 character prefixes, but there are valid
+ # 4 character prefixes. That seems extremely unlikely.
+
+ # Note that the empty prefix is being included, because length
+ # starts at 0. That's expected, since StringPrefix includes
+ # the empty prefix.
+
+ valid_prefixes = set()
+ for length in itertools.count():
+ num_at_this_length = 0
+ for prefix in (
+ "".join(l) for l in
list(itertools.combinations(string.ascii_lowercase, length))
+ ):
+ for t in itertools.permutations(prefix):
+ for u in itertools.product(*[(c, c.upper()) for c in t]):
+ p = ''.join(u)
+ if p == "not":
+ # 'not' can never be a string prefix,
+ # because it's a valid expression: not ""
+ continue
+ try:
+ eval(f'{p}""')
+
+ # No syntax error, so p is a valid string
+ # prefix.
+
+ valid_prefixes.add(p)
+ num_at_this_length += 1
+ except SyntaxError:
+ pass
+ if num_at_this_length == 0:
+ break
+
+ self.assertEqual(defined_prefixes, valid_prefixes)
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 559a7aecbde2d1..7e71755068e1df 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -86,7 +86,7 @@ def _all_string_prefixes():
# The valid string prefixes. Only contain the lower case versions,
# and don't contain any permutations (include 'fr', but not
# 'rf'). The various permutations will be generated.
- _valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr']
+ _valid_string_prefixes = ['b', 'r', 'u', 'f', 't', 'br', 'fr', 'tr']
# if we add binary f-strings, add: ['fb', 'fbr']
result = {''}
for prefix in _valid_string_prefixes:
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]