https://github.com/python/cpython/commit/ef66fb597ba909ead2fbfc06f748aa7b7e9ea437
commit: ef66fb597ba909ead2fbfc06f748aa7b7e9ea437
branch: main
author: Pablo Galindo Salgado <[email protected]>
committer: pablogsal <[email protected]>
date: 2025-07-16T11:47:13+02:00
summary:
gh-135148: Correctly handle f/t strings with comments and debug expressions
(#135198)
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst
M Lib/test/test_fstring.py
M Parser/lexer/lexer.c
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index 58a30c8e6ac447..b41e02c3a16379 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -1651,6 +1651,18 @@ def __repr__(self):
self.assertEqual(f"{1+2 = # my comment
}", '1+2 = \n 3')
+ self.assertEqual(f'{""" # booo
+ """=}', '""" # booo\n """=\' # booo\\n \'')
+
+ self.assertEqual(f'{" # nooo "=}', '" # nooo "=\' # nooo \'')
+ self.assertEqual(f'{" \" # nooo \" "=}', '" \\" # nooo \\" "=\' " #
nooo " \'')
+
+ self.assertEqual(f'{ # some comment goes here
+ """hello"""=}', ' \n """hello"""=\'hello\'')
+ self.assertEqual(f'{"""# this is not a comment
+ a""" # this is a comment
+ }', '# this is not a comment\n a')
+
# These next lines contains tabs. Backslash escapes don't
# work in f-strings.
# patchcheck doesn't like these tabs. So the only way to test
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst
new file mode 100644
index 00000000000000..9b1f62433b45ed
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst
@@ -0,0 +1,3 @@
+Fixed a bug where f-string debug expressions (using =) would incorrectly
+strip out parts of strings containing escaped quotes and # characters. Patch
+by Pablo Galindo.
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index 0a078dd594148c..81363cf8e810fe 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -121,38 +121,88 @@ set_ftstring_expr(struct tok_state* tok, struct token
*token, char c) {
}
PyObject *res = NULL;
- // Check if there is a # character in the expression
+ // Look for a # character outside of string literals
int hash_detected = 0;
+ int in_string = 0;
+ char quote_char = 0;
+
for (Py_ssize_t i = 0; i < tok_mode->last_expr_size -
tok_mode->last_expr_end; i++) {
- if (tok_mode->last_expr_buffer[i] == '#') {
+ char ch = tok_mode->last_expr_buffer[i];
+
+ // Skip escaped characters
+ if (ch == '\\') {
+ i++;
+ continue;
+ }
+
+ // Handle quotes
+ if (ch == '"' || ch == '\'') {
+ // The following if/else block works becase there is an off number
+ // of quotes in STRING tokens and the lexer only ever reaches this
+ // function with valid STRING tokens.
+ // For example: """hello"""
+ // First quote: in_string = 1
+ // Second quote: in_string = 0
+ // Third quote: in_string = 1
+ if (!in_string) {
+ in_string = 1;
+ quote_char = ch;
+ }
+ else if (ch == quote_char) {
+ in_string = 0;
+ }
+ continue;
+ }
+
+ // Check for # outside strings
+ if (ch == '#' && !in_string) {
hash_detected = 1;
break;
}
}
-
+ // If we found a # character in the expression, we need to handle comments
if (hash_detected) {
- Py_ssize_t input_length = tok_mode->last_expr_size -
tok_mode->last_expr_end;
- char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char));
+ // Allocate buffer for processed result
+ char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size -
tok_mode->last_expr_end + 1) * sizeof(char));
if (!result) {
return -1;
}
- Py_ssize_t i = 0;
- Py_ssize_t j = 0;
+ Py_ssize_t i = 0; // Input position
+ Py_ssize_t j = 0; // Output position
+ in_string = 0; // Whether we're in a string
+ quote_char = 0; // Current string quote char
- for (i = 0, j = 0; i < input_length; i++) {
- if (tok_mode->last_expr_buffer[i] == '#') {
- // Skip characters until newline or end of string
- while (i < input_length && tok_mode->last_expr_buffer[i] !=
'\0') {
- if (tok_mode->last_expr_buffer[i] == '\n') {
- result[j++] = tok_mode->last_expr_buffer[i];
- break;
- }
+ // Process each character
+ while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
+ char ch = tok_mode->last_expr_buffer[i];
+
+ // Handle string quotes
+ if (ch == '"' || ch == '\'') {
+ // See comment above to understand this part
+ if (!in_string) {
+ in_string = 1;
+ quote_char = ch;
+ } else if (ch == quote_char) {
+ in_string = 0;
+ }
+ result[j++] = ch;
+ }
+ // Skip comments
+ else if (ch == '#' && !in_string) {
+ while (i < tok_mode->last_expr_size - tok_mode->last_expr_end
&&
+ tok_mode->last_expr_buffer[i] != '\n') {
i++;
}
- } else {
- result[j++] = tok_mode->last_expr_buffer[i];
+ if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
+ result[j++] = '\n';
+ }
+ }
+ // Copy other chars
+ else {
+ result[j++] = ch;
}
+ i++;
}
result[j] = '\0'; // Null-terminate the result string
@@ -164,11 +214,9 @@ set_ftstring_expr(struct tok_state* tok, struct token
*token, char c) {
tok_mode->last_expr_size - tok_mode->last_expr_end,
NULL
);
-
}
-
- if (!res) {
+ if (!res) {
return -1;
}
token->metadata = res;
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]