New submission from Florent Xicluna <florent.xicl...@gmail.com>:

from io import BytesIO
from tokenize import tokenize, tok_name

sample = 'éléphants = "un éléphant, deux éléphants, ..."\nprint(éléphants)\n'
sampleb = sample.encode('utf-8')

exec(sample)
# output: un éléphant, deux éléphants, ...
exec(sampleb)
# output: un éléphant, deux éléphants, ...

module = BytesIO()
module.write(sampleb)
module.seek(0)

for line in tokenize(module.readline):
    print(tok_name[line.type], line)


# output:
ENCODING TokenInfo(type=57, string='utf-8', start=(0, 0), end=(0, 0), line='')
ERRORTOKEN TokenInfo(type=54, string='é', start=(1, 0), end=(1, 1), 
line='éléphants = "un éléphant, deux éléphants, ..."\n')
NAME TokenInfo(type=1, string='léphants', start=(1, 1), end=(1, 9), 
line='éléphants = "un éléphant, deux éléphants, ..."\n')
OP TokenInfo(type=53, string='=', start=(1, 10), end=(1, 11), line='éléphants = 
"un éléphant, deux éléphants, ..."\n')
STRING TokenInfo(type=3, string='"un éléphant, deux éléphants, ..."', start=(1, 
12), end=(1, 46), line='éléphants = "un éléphant, deux éléphants, ..."\n')
NEWLINE TokenInfo(type=4, string='\n', start=(1, 46), end=(1, 47), 
line='éléphants = "un éléphant, deux éléphants, ..."\n')
NAME TokenInfo(type=1, string='print', start=(2, 0), end=(2, 5), 
line='print(éléphants)\n')
OP TokenInfo(type=53, string='(', start=(2, 5), end=(2, 6), 
line='print(éléphants)\n')
ERRORTOKEN TokenInfo(type=54, string='é', start=(2, 6), end=(2, 7), 
line='print(éléphants)\n')
NAME TokenInfo(type=1, string='léphants', start=(2, 7), end=(2, 15), 
line='print(éléphants)\n')
OP TokenInfo(type=53, string=')', start=(2, 15), end=(2, 16), 
line='print(éléphants)\n')
NEWLINE TokenInfo(type=4, string='\n', start=(2, 16), end=(2, 17), 
line='print(éléphants)\n')
ENDMARKER TokenInfo(type=0, string='', start=(3, 0), end=(3, 0), line='')

----------
messages: 115201
nosy: flox
priority: normal
severity: normal
stage: unit test needed
status: open
title: tokenize yield an ERRORTOKEN if the identifier starts with a non-ascii 
char
type: behavior
versions: Python 3.1, Python 3.2

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue9712>
_______________________________________
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to