Author: fijal
Branch: unicode-utf8
Changeset: r90456:777001a4a191
Date: 2017-03-01 16:52 +0100
http://bitbucket.org/pypy/pypy/changeset/777001a4a191/
Log: "fix" multibytecodec
diff --git a/pypy/module/_multibytecodec/c_codecs.py
b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -3,7 +3,7 @@
from rpython.translator.tool.cbuild import ExternalCompilationInfo
from rpython.translator import cdir
-UNICODE_REPLACEMENT_CHARACTER = u'\uFFFD'
+UNICODE_REPLACEMENT_CHARACTER = u'\uFFFD'.encode("utf8")
class EncodeDecodeError(Exception):
@@ -148,15 +148,17 @@
if errors == "strict":
raise EncodeDecodeError(start, end, reason)
elif errors == "ignore":
- replace = u""
+ replace = ""
+ lgt = 0
elif errors == "replace":
replace = UNICODE_REPLACEMENT_CHARACTER
+ lgt = 1
else:
assert errorcb
- replace, end = errorcb(errors, namecb, reason,
+ replace, end, lgt = errorcb(errors, namecb, reason,
stringdata, start, end)
- with rffi.scoped_nonmoving_unicodebuffer(replace) as inbuf:
- r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, len(replace), end)
+ with rffi.scoped_nonmoving_unicodebuffer(replace.decode("utf8")) as inbuf:
+ r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, lgt, end)
if r == MBERR_NOMEMORY:
raise MemoryError
@@ -255,15 +257,15 @@
replace = "?"
else:
assert errorcb
- retu, rets, end = errorcb(errors, namecb, reason,
- unicodedata, start, end)
+ retu, rets, end, lgt = errorcb(errors, namecb, reason,
+ unicodedata.encode("utf8"), start, end)
if rets is not None:
# py3k only
replace = rets
else:
assert retu is not None
codec = pypy_cjk_enc_getcodec(encodebuf)
- replace = encode(codec, retu, "strict", errorcb, namecb)
+ replace = encode(codec, retu.decode("utf8"), "strict", errorcb,
namecb)
with rffi.scoped_nonmovingbuffer(replace) as inbuf:
r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end)
if r == MBERR_NOMEMORY:
diff --git a/pypy/module/_multibytecodec/interp_incremental.py
b/pypy/module/_multibytecodec/interp_incremental.py
--- a/pypy/module/_multibytecodec/interp_incremental.py
+++ b/pypy/module/_multibytecodec/interp_incremental.py
@@ -96,8 +96,9 @@
c_codecs.pypy_cjk_enc_free(self.encodebuf)
self.encodebuf = lltype.nullptr(c_codecs.ENCODEBUF_P.TO)
- @unwrap_spec(object=unicode, final=bool)
- def encode_w(self, object, final=False):
+ @unwrap_spec(utf8object='utf8', final=bool)
+ def encode_w(self, utf8object, objlen, final=False):
+ object = utf8object.decode('utf8')
space = self.space
state = space.fromcache(CodecState)
if len(self.pending) > 0:
@@ -107,7 +108,7 @@
state.encode_error_handler, self.name,
get_ignore_error(final))
except c_codecs.EncodeDecodeError as e:
- raise wrap_unicodeencodeerror(space, e, object, self.name)
+ raise wrap_unicodeencodeerror(space, e, utf8object, self.name)
except RuntimeError:
raise wrap_runtimeerror(space)
pos = c_codecs.pypy_cjk_enc_inbuf_consumed(self.encodebuf)
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py
b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -18,30 +18,30 @@
state = space.fromcache(CodecState)
#
try:
- output = c_codecs.decode(self.codec, input, errors,
+ u_output = c_codecs.decode(self.codec, input, errors,
state.decode_error_handler, self.name)
except c_codecs.EncodeDecodeError as e:
raise wrap_unicodedecodeerror(space, e, input, self.name)
except RuntimeError:
raise wrap_runtimeerror(space)
- return space.newtuple([space.newunicode(output),
+ return space.newtuple([space.newunicode(u_output),
space.newint(len(input))])
- @unwrap_spec(input=unicode, errors="str_or_None")
- def encode(self, space, input, errors=None):
+ @unwrap_spec(input='utf8', errors="str_or_None")
+ def encode(self, space, input, inputlen, errors=None):
if errors is None:
errors = 'strict'
state = space.fromcache(CodecState)
#
try:
- output = c_codecs.encode(self.codec, input, errors,
+ output = c_codecs.encode(self.codec, input.decode('utf8'), errors,
state.encode_error_handler, self.name)
except c_codecs.EncodeDecodeError as e:
raise wrap_unicodeencodeerror(space, e, input, self.name)
except RuntimeError:
raise wrap_runtimeerror(space)
return space.newtuple([space.newbytes(output),
- space.newint(len(input))])
+ space.newint(inputlen)])
MultibyteCodec.typedef = TypeDef(
@@ -76,7 +76,7 @@
space.w_UnicodeEncodeError,
space.newtuple([
space.newtext(name),
- space.newunicode(input),
+ space.newutf8(input, -1),
space.newint(e.start),
space.newint(e.end),
space.newtext(e.reason)]))
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py
b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -126,6 +126,6 @@
def test_encode_custom_error_handler_bytes():
c = getcodec("hz")
def errorhandler(errors, enc, msg, t, startingpos, endingpos):
- return None, '\xc3', endingpos
+ return None, '\xc3', endingpos, -1
s = encode(c, u'abc\u1234def', 'foo', errorhandler)
assert '\xc3' in s
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit