Tal Einat added the comment:
Alright, so I'm going to use the equivalent of the following code, unless
someone can tell me that something is wrong:
from keyword import iskeyword
from unicodedata import category, normalize
_ID_FIRST_CATEGORIES = {"Lu", "Ll", "Lt", "Lm", "Lo", "Nl",
"Other_ID_Start"}
_ID_CATEGORIES = _ID_FIRST_CATEGORIES | {"Mn", "Mc", "Nd", "Pc",
"Other_ID_Continue"}
_ASCII_ID_CHARS = set(string.ascii_letters + string.digits + "_")
_ID_KEYWORDS = {"True", "False", "None"}
def is_id_char(char):
return char in _ASCII_ID_CHARS or (
ord(char) >= 128 and
category(normalize(char)[0]) in _ID_CATEGORIES
)
def is_identifier(id_candidate):
return id_candidate.isidentifier() and (
(not iskeyword(id_candidate)) or
id_candidate in _ID_KEYWORDS
)
def _eat_identifier(str, limit, pos):
i = pos
while i > limit and is_id_char(str[pos - i]):
i -= 1
if i < pos and not is_identifier(str[i:pos]):
return 0
return pos - i
----------
_______________________________________
Python tracker <[email protected]>
<http://bugs.python.org/issue21765>
_______________________________________
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com