Serhiy Storchaka <storch...@gmail.com> added the comment:
Patches updated with tests.
----------
Added file: http://bugs.python.org/file26031/tokenize_bytes_py2-2.patch
Added file: http://bugs.python.org/file26032/tokenize_bytes_py3-2.patch
_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue15054>
_______________________________________
diff -r 412c7daed0db Lib/test/test_tokenize.py
--- a/Lib/test/test_tokenize.py Thu Jun 14 00:36:54 2012 +0200
+++ b/Lib/test/test_tokenize.py Sat Jun 16 23:28:52 2012 +0300
@@ -278,6 +278,31 @@
OP '+' (1, 32) (1, 33)
STRING 'UR"ABC"' (1, 34) (1, 41)
+ >>> dump_tokens("b'abc' + B'abc'")
+ STRING "b'abc'" (1, 0) (1, 6)
+ OP '+' (1, 7) (1, 8)
+ STRING "B'abc'" (1, 9) (1, 15)
+ >>> dump_tokens('b"abc" + B"abc"')
+ STRING 'b"abc"' (1, 0) (1, 6)
+ OP '+' (1, 7) (1, 8)
+ STRING 'B"abc"' (1, 9) (1, 15)
+ >>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'")
+ STRING "br'abc'" (1, 0) (1, 7)
+ OP '+' (1, 8) (1, 9)
+ STRING "bR'abc'" (1, 10) (1, 17)
+ OP '+' (1, 18) (1, 19)
+ STRING "Br'abc'" (1, 20) (1, 27)
+ OP '+' (1, 28) (1, 29)
+ STRING "BR'abc'" (1, 30) (1, 37)
+ >>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"')
+ STRING 'br"abc"' (1, 0) (1, 7)
+ OP '+' (1, 8) (1, 9)
+ STRING 'bR"abc"' (1, 10) (1, 17)
+ OP '+' (1, 18) (1, 19)
+ STRING 'Br"abc"' (1, 20) (1, 27)
+ OP '+' (1, 28) (1, 29)
+ STRING 'BR"abc"' (1, 30) (1, 37)
+
Operators
>>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")
diff -r 412c7daed0db Lib/tokenize.py
--- a/Lib/tokenize.py Thu Jun 14 00:36:54 2012 +0200
+++ b/Lib/tokenize.py Sat Jun 16 23:28:52 2012 +0300
@@ -70,10 +70,10 @@
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
# Tail end of """ string.
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
-Triple = group("[uU]?[rR]?'''", '[uU]?[rR]?"""')
+Triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""')
# Single-line ' or " string.
-String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
- r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
+String = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
+ r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
# Because of leftmost-then-longest match semantics, be sure to put the
# longest operators first (e.g., if = came before ==, == would get
@@ -91,9 +91,9 @@
Token = Ignore + PlainToken
# First (or only) line of ' or " string.
-ContStr = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
+ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
group("'", r'\\\r?\n'),
- r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
+ r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
group('"', r'\\\r?\n'))
PseudoExtras = group(r'\\\r?\n', Comment, Triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
diff -r f6463dc5ead6 Lib/test/test_tokenize.py
--- a/Lib/test/test_tokenize.py Sat Jun 16 16:38:26 2012 +0200
+++ b/Lib/test/test_tokenize.py Sat Jun 16 23:28:44 2012 +0300
@@ -289,6 +289,82 @@
OP '+' (1, 29) (1, 30)
STRING 'R"ABC"' (1, 31) (1, 37)
+ >>> dump_tokens("u'abc' + U'abc'")
+ ENCODING 'utf-8' (0, 0) (0, 0)
+ STRING "u'abc'" (1, 0) (1, 6)
+ OP '+' (1, 7) (1, 8)
+ STRING "U'abc'" (1, 9) (1, 15)
+ >>> dump_tokens('u"abc" + U"abc"')
+ ENCODING 'utf-8' (0, 0) (0, 0)
+ STRING 'u"abc"' (1, 0) (1, 6)
+ OP '+' (1, 7) (1, 8)
+ STRING 'U"abc"' (1, 9) (1, 15)
+ >>> dump_tokens("ur'abc' + uR'abc' + Ur'abc' + UR'abc'")
+ ENCODING 'utf-8' (0, 0) (0, 0)
+ STRING "ur'abc'" (1, 0) (1, 7)
+ OP '+' (1, 8) (1, 9)
+ STRING "uR'abc'" (1, 10) (1, 17)
+ OP '+' (1, 18) (1, 19)
+ STRING "Ur'abc'" (1, 20) (1, 27)
+ OP '+' (1, 28) (1, 29)
+ STRING "UR'abc'" (1, 30) (1, 37)
+ >>> dump_tokens('ur"abc" + uR"abc" + Ur"abc" + UR"abc"')
+ ENCODING 'utf-8' (0, 0) (0, 0)
+ STRING 'ur"abc"' (1, 0) (1, 7)
+ OP '+' (1, 8) (1, 9)
+ STRING 'uR"abc"' (1, 10) (1, 17)
+ OP '+' (1, 18) (1, 19)
+ STRING 'Ur"abc"' (1, 20) (1, 27)
+ OP '+' (1, 28) (1, 29)
+ STRING 'UR"abc"' (1, 30) (1, 37)
+
+ >>> dump_tokens("b'abc' + B'abc'")
+ ENCODING 'utf-8' (0, 0) (0, 0)
+ STRING "b'abc'" (1, 0) (1, 6)
+ OP '+' (1, 7) (1, 8)
+ STRING "B'abc'" (1, 9) (1, 15)
+ >>> dump_tokens('b"abc" + B"abc"')
+ ENCODING 'utf-8' (0, 0) (0, 0)
+ STRING 'b"abc"' (1, 0) (1, 6)
+ OP '+' (1, 7) (1, 8)
+ STRING 'B"abc"' (1, 9) (1, 15)
+ >>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'")
+ ENCODING 'utf-8' (0, 0) (0, 0)
+ STRING "br'abc'" (1, 0) (1, 7)
+ OP '+' (1, 8) (1, 9)
+ STRING "bR'abc'" (1, 10) (1, 17)
+ OP '+' (1, 18) (1, 19)
+ STRING "Br'abc'" (1, 20) (1, 27)
+ OP '+' (1, 28) (1, 29)
+ STRING "BR'abc'" (1, 30) (1, 37)
+ >>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"')
+ ENCODING 'utf-8' (0, 0) (0, 0)
+ STRING 'br"abc"' (1, 0) (1, 7)
+ OP '+' (1, 8) (1, 9)
+ STRING 'bR"abc"' (1, 10) (1, 17)
+ OP '+' (1, 18) (1, 19)
+ STRING 'Br"abc"' (1, 20) (1, 27)
+ OP '+' (1, 28) (1, 29)
+ STRING 'BR"abc"' (1, 30) (1, 37)
+ >>> dump_tokens("rb'abc' + rB'abc' + Rb'abc' + RB'abc'")
+ ENCODING 'utf-8' (0, 0) (0, 0)
+ STRING "rb'abc'" (1, 0) (1, 7)
+ OP '+' (1, 8) (1, 9)
+ STRING "rB'abc'" (1, 10) (1, 17)
+ OP '+' (1, 18) (1, 19)
+ STRING "Rb'abc'" (1, 20) (1, 27)
+ OP '+' (1, 28) (1, 29)
+ STRING "RB'abc'" (1, 30) (1, 37)
+ >>> dump_tokens('rb"abc" + rB"abc" + Rb"abc" + RB"abc"')
+ ENCODING 'utf-8' (0, 0) (0, 0)
+ STRING 'rb"abc"' (1, 0) (1, 7)
+ OP '+' (1, 8) (1, 9)
+ STRING 'rB"abc"' (1, 10) (1, 17)
+ OP '+' (1, 18) (1, 19)
+ STRING 'Rb"abc"' (1, 20) (1, 27)
+ OP '+' (1, 28) (1, 29)
+ STRING 'RB"abc"' (1, 30) (1, 37)
+
Operators
>>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")
diff -r f6463dc5ead6 Lib/tokenize.py
--- a/Lib/tokenize.py Sat Jun 16 16:38:26 2012 +0200
+++ b/Lib/tokenize.py Sat Jun 16 23:28:44 2012 +0300
@@ -127,7 +127,7 @@
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
Number = group(Imagnumber, Floatnumber, Intnumber)
-StringPrefix = r'(?:[uU][rR]?|[bB][rR]|[rR][bB]|[rR]|[uU])?'
+StringPrefix = r'(?:[uUbB][rR]?|[rR][bB]?)?'
# Tail end of ' string.
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com