Author: Armin Rigo <[email protected]>
Branch:
Changeset: r46143:f040a9a3f4fb
Date: 2011-07-31 18:16 +0200
http://bitbucket.org/pypy/pypy/changeset/f040a9a3f4fb/
Log: Incremental support: keep the decodebuf around several calls to
decodeex(), and don't complain when getting MBERR_TOOFEW.
diff --git a/pypy/module/_multibytecodec/c_codecs.py
b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -52,11 +52,13 @@
includes = ['src/cjkcodecs/multibytecodec.h'],
include_dirs = [str(srcdir)],
export_symbols = [
+ "pypy_cjk_dec_new",
"pypy_cjk_dec_init", "pypy_cjk_dec_free", "pypy_cjk_dec_chunk",
"pypy_cjk_dec_outbuf", "pypy_cjk_dec_outlen",
"pypy_cjk_dec_inbuf_remaining", "pypy_cjk_dec_inbuf_consumed",
"pypy_cjk_dec_replace_on_error",
+ "pypy_cjk_enc_new",
"pypy_cjk_enc_init", "pypy_cjk_enc_free", "pypy_cjk_enc_chunk",
"pypy_cjk_enc_reset", "pypy_cjk_enc_outbuf", "pypy_cjk_enc_outlen",
"pypy_cjk_enc_inbuf_remaining", "pypy_cjk_enc_inbuf_consumed",
@@ -92,9 +94,11 @@
# Decoding
DECODEBUF_P = rffi.COpaquePtr('struct pypy_cjk_dec_s', compilation_info=eci)
+pypy_cjk_dec_new = llexternal('pypy_cjk_dec_new',
+ [MULTIBYTECODEC_P], DECODEBUF_P)
pypy_cjk_dec_init = llexternal('pypy_cjk_dec_init',
- [MULTIBYTECODEC_P, rffi.CCHARP, rffi.SSIZE_T],
- DECODEBUF_P)
+ [DECODEBUF_P, rffi.CCHARP, rffi.SSIZE_T],
+ rffi.SSIZE_T)
pypy_cjk_dec_free = llexternal('pypy_cjk_dec_free', [DECODEBUF_P],
lltype.Void)
pypy_cjk_dec_chunk = llexternal('pypy_cjk_dec_chunk', [DECODEBUF_P],
@@ -113,25 +117,33 @@
rffi.SSIZE_T)
def decode(codec, stringdata, errors="strict", errorcb=None, namecb=None):
+ decodebuf = pypy_cjk_dec_new(codec)
+ if not decodebuf:
+ raise MemoryError
+ try:
+ return decodeex(decodebuf, stringdata, errors, errorcb, namecb)
+ finally:
+ pypy_cjk_dec_free(decodebuf)
+
+def decodeex(decodebuf, stringdata, errors="strict", errorcb=None, namecb=None,
+ incompletepos=None):
inleft = len(stringdata)
inbuf = rffi.get_nonmovingbuffer(stringdata)
try:
- decodebuf = pypy_cjk_dec_init(codec, inbuf, inleft)
- if not decodebuf:
+ if pypy_cjk_dec_init(decodebuf, inbuf, inleft) < 0:
raise MemoryError
- try:
- while True:
- r = pypy_cjk_dec_chunk(decodebuf)
- if r == 0:
- break
- multibytecodec_decerror(decodebuf, r, errors,
- errorcb, namecb, stringdata)
- src = pypy_cjk_dec_outbuf(decodebuf)
- length = pypy_cjk_dec_outlen(decodebuf)
- return rffi.wcharpsize2unicode(src, length)
- #
- finally:
- pypy_cjk_dec_free(decodebuf)
+ while True:
+ r = pypy_cjk_dec_chunk(decodebuf)
+ if r == 0:
+ break
+ if incompletepos is not None and r == MBERR_TOOFEW:
+ incompletepos[0] = pypy_cjk_dec_inbuf_consumed(decodebuf)
+ break
+ multibytecodec_decerror(decodebuf, r, errors,
+ errorcb, namecb, stringdata)
+ src = pypy_cjk_dec_outbuf(decodebuf)
+ length = pypy_cjk_dec_outlen(decodebuf)
+ return rffi.wcharpsize2unicode(src, length)
#
finally:
rffi.free_nonmovingbuffer(stringdata, inbuf)
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py
b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -2,6 +2,7 @@
from pypy.module._multibytecodec.c_codecs import getcodec, codecs
from pypy.module._multibytecodec.c_codecs import decode, encode
from pypy.module._multibytecodec.c_codecs import EncodeDecodeError
+from pypy.module._multibytecodec import c_codecs
def test_codecs_existence():
@@ -22,6 +23,51 @@
c = getcodec("hz")
u = decode(c, "~{abc}")
assert u == u'\u5f95\u6cef'
+ u = decode(c, "~{")
+ assert u == u''
+
+def test_decodeex_hz():
+ c = getcodec("hz")
+ decodebuf = c_codecs.pypy_cjk_dec_new(c)
+ u = c_codecs.decodeex(decodebuf, "~{abcd~}")
+ assert u == u'\u5f95\u6c85'
+ u = c_codecs.decodeex(decodebuf, "~{efgh~}")
+ assert u == u'\u5f50\u73b7'
+ u = c_codecs.decodeex(decodebuf, "!~{abcd~}xyz~{efgh")
+ assert u == u'!\u5f95\u6c85xyz\u5f50\u73b7'
+ c_codecs.pypy_cjk_dec_free(decodebuf)
+
+def test_decodeex_hz_incomplete():
+ c = getcodec("hz")
+ decodebuf = c_codecs.pypy_cjk_dec_new(c)
+ buf = ''
+ for c, output in zip("!~{abcd~}xyz~{efgh",
+ [u'!', # !
+ u'', # ~
+ u'', # {
+ u'', # a
+ u'\u5f95', # b
+ u'', # c
+ u'\u6c85', # d
+ u'', # ~
+ u'', # }
+ u'x', # x
+ u'y', # y
+ u'z', # z
+ u'', # ~
+ u'', # {
+ u'', # e
+ u'\u5f50', # f
+ u'', # g
+ u'\u73b7', # h
+ ]):
+ buf += c
+ incompletepos = [len(buf)]
+ u = c_codecs.decodeex(decodebuf, buf, incompletepos=incompletepos)
+ assert u == output
+ buf = buf[incompletepos[0]:]
+ assert buf == ''
+ c_codecs.pypy_cjk_dec_free(decodebuf)
def test_decode_hz_error():
# error
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c
b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
@@ -3,31 +3,38 @@
#include "src/cjkcodecs/multibytecodec.h"
-struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec,
- char *inbuf, Py_ssize_t inlen)
+struct pypy_cjk_dec_s *pypy_cjk_dec_new(const MultibyteCodec *codec)
{
struct pypy_cjk_dec_s *d = malloc(sizeof(struct pypy_cjk_dec_s));
if (!d)
return NULL;
if (codec->decinit != NULL && codec->decinit(&d->state, codec->config) != 0)
- goto errorexit;
+ {
+ free(d);
+ return NULL;
+ }
+ d->codec = codec;
+ d->outbuf_start = NULL;
+ return d;
+}
- d->codec = codec;
+Py_ssize_t pypy_cjk_dec_init(struct pypy_cjk_dec_s *d,
+ char *inbuf, Py_ssize_t inlen)
+{
d->inbuf_start = inbuf;
d->inbuf = inbuf;
d->inbuf_end = inbuf + inlen;
- d->outbuf_start = (inlen <= (PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) ?
- malloc(inlen * sizeof(Py_UNICODE)) :
- NULL);
- if (!d->outbuf_start)
- goto errorexit;
+ if (d->outbuf_start == NULL)
+ {
+ d->outbuf_start = (inlen <= (PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) ?
+ malloc(inlen * sizeof(Py_UNICODE)) :
+ NULL);
+ if (d->outbuf_start == NULL)
+ return -1;
+ d->outbuf_end = d->outbuf_start + inlen;
+ }
d->outbuf = d->outbuf_start;
- d->outbuf_end = d->outbuf_start + inlen;
- return d;
-
- errorexit:
- free(d);
- return NULL;
+ return 0;
}
void pypy_cjk_dec_free(struct pypy_cjk_dec_s *d)
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h
b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
@@ -94,8 +94,9 @@
Py_UNICODE *outbuf_start, *outbuf, *outbuf_end;
};
-struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec,
- char *inbuf, Py_ssize_t inlen);
+struct pypy_cjk_dec_s *pypy_cjk_dec_new(const MultibyteCodec *codec);
+Py_ssize_t pypy_cjk_dec_init(struct pypy_cjk_dec_s *d,
+ char *inbuf, Py_ssize_t inlen);
void pypy_cjk_dec_free(struct pypy_cjk_dec_s *);
Py_ssize_t pypy_cjk_dec_chunk(struct pypy_cjk_dec_s *);
Py_UNICODE *pypy_cjk_dec_outbuf(struct pypy_cjk_dec_s *);
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit