Author: Amaury Forgeot d'Arc <[email protected]>
Branch: SomeString-charclass
Changeset: r72439:85c3cda1fc4d
Date: 2014-07-13 20:24 +0200
http://bitbucket.org/pypy/pypy/changeset/85c3cda1fc4d/
Log: Add Utf8Char kind. Will be used by space.identifier_w()
diff --git a/rpython/annotator/model.py b/rpython/annotator/model.py
--- a/rpython/annotator/model.py
+++ b/rpython/annotator/model.py
@@ -223,6 +223,7 @@
"""A character of any value."""
no_nul = False
is_ascii = False
+ is_utf8 = False
_instances = {}
@@ -259,7 +260,24 @@
return AnyChar()
NoNulChar._register()
-class AsciiChar(NoNulChar):
+class Utf8Char(NoNulChar):
+ """A character compatible with utf8 encoding.
+
+ Does not mean that the string can always be decoded with utf8,
+ specially for slices or single characters. This kind indicates that
+ utf8 is the encoding to use when converting to unicode."""
+ is_utf8 = True
+
+ def union(self, other):
+ if other.is_utf8:
+ return self
+ elif other.no_nul:
+ return NoNulChar()
+ else:
+ return AnyChar()
+Utf8Char._register()
+
+class AsciiChar(Utf8Char):
"""A character in the range(1, 128).
Strings of this kind can be decoded faster to unicode."""
@@ -269,6 +287,8 @@
def union(self, other):
if other.is_ascii:
return self
+ elif other.is_utf8:
+ return Utf8Char()
elif other.no_nul:
return NoNulChar()
else:
@@ -277,6 +297,8 @@
def charkind_from_const(value):
+ # Probably no need to handle utf-8, we don't have such constants
+ # in pypy code.
try:
value.decode('ascii')
except UnicodeDecodeError:
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit