Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r44709:ab73d694925f Date: 2011-06-05 11:06 +0200 http://bitbucket.org/pypy/pypy/changeset/ab73d694925f/
Log: errors="replace" in decode. diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py --- a/pypy/module/_multibytecodec/c_codecs.py +++ b/pypy/module/_multibytecodec/c_codecs.py @@ -104,7 +104,8 @@ pypy_cjk_dec_inbuf_consumed = llexternal('pypy_cjk_dec_inbuf_consumed', [DECODEBUF_P], rffi.SSIZE_T) pypy_cjk_dec_inbuf_add = llexternal('pypy_cjk_dec_inbuf_add', - [DECODEBUF_P, rffi.SSIZE_T], lltype.Void) + [DECODEBUF_P, rffi.SSIZE_T, rffi.INT], + rffi.INT) def decode(codec, stringdata, errors="strict"): inleft = len(stringdata) @@ -141,9 +142,13 @@ else: raise RuntimeError # - # if errors == ERROR_REPLACE:... - if errors == "ignore": # or errors == ERROR_REPLACE - pypy_cjk_dec_inbuf_add(decodebuf, esize) + if errors == "ignore": + pypy_cjk_dec_inbuf_add(decodebuf, esize, 0) + return # continue decoding + if errors == "replace": + e = pypy_cjk_dec_inbuf_add(decodebuf, esize, 1) + if e == MBERR_NOMEMORY: + raise MemoryError return # continue decoding start = pypy_cjk_dec_inbuf_consumed(decodebuf) end = start + esize diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py b/pypy/module/_multibytecodec/test/test_app_codecs.py --- a/pypy/module/_multibytecodec/test/test_app_codecs.py +++ b/pypy/module/_multibytecodec/test/test_app_codecs.py @@ -44,6 +44,14 @@ r = codec.decode("def~{}abc", 'ignore') assert r == (u'def\u5fcf', 9) + def test_decode_hz_replace(self): + import _codecs_cn + codec = _codecs_cn.getcodec("hz") + r = codec.decode("def~{}abc", errors='replace') + assert r == (u'def\ufffd\u5fcf', 9) + r = codec.decode("def~{}abc", 'replace') + assert r == (u'def\ufffd\u5fcf', 9) + def test_encode_hz(self): import _codecs_cn codec = _codecs_cn.getcodec("hz") diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py --- a/pypy/module/_multibytecodec/test/test_c_codecs.py +++ b/pypy/module/_multibytecodec/test/test_c_codecs.py @@ -41,6 +41,11 @@ u = decode(c, 'def~{}abc', 'ignore') assert u == u'def\u5fcf' +def test_decode_hz_replace(): + c = getcodec("hz") + u = decode(c, 'def~{}abc', 'replace') + assert u == u'def\ufffd\u5fcf' + def test_encode_hz(): c = getcodec("hz") s = encode(c, u'foobar') diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c b/pypy/translator/c/src/cjkcodecs/multibytecodec.c --- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c +++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c @@ -1,6 +1,8 @@ #include <stdlib.h> #include "src/cjkcodecs/multibytecodec.h" +#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD) + struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec, char *inbuf, Py_ssize_t inlen) @@ -93,9 +95,18 @@ return d->inbuf - d->inbuf_start; } -void pypy_cjk_dec_inbuf_add(struct pypy_cjk_dec_s* d, Py_ssize_t skip) +int pypy_cjk_dec_inbuf_add(struct pypy_cjk_dec_s* d, Py_ssize_t skip, + int add_replacement_character) { + if (add_replacement_character) + { + if (d->outbuf >= d->outbuf_end) + if (expand_decodebuffer(d, 1) == -1) + return MBERR_NOMEMORY; + *d->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER; + } d->inbuf += skip; + return 0; } /************************************************************/ diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h b/pypy/translator/c/src/cjkcodecs/multibytecodec.h --- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h +++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h @@ -102,7 +102,7 @@ Py_ssize_t pypy_cjk_dec_outlen(struct pypy_cjk_dec_s *); Py_ssize_t pypy_cjk_dec_inbuf_remaining(struct pypy_cjk_dec_s *d); Py_ssize_t pypy_cjk_dec_inbuf_consumed(struct pypy_cjk_dec_s* d); -void pypy_cjk_dec_inbuf_add(struct pypy_cjk_dec_s*, Py_ssize_t); +int pypy_cjk_dec_inbuf_add(struct pypy_cjk_dec_s*, Py_ssize_t, int); struct pypy_cjk_enc_s { const MultibyteCodec *codec; _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit