Author: Armin Rigo <[email protected]>
Branch:
Changeset: r46145:8949a315da3f
Date: 2011-07-31 18:34 +0200
http://bitbucket.org/pypy/pypy/changeset/8949a315da3f/
Log: MultibyteIncrementalDecoder.
diff --git a/pypy/module/_multibytecodec/__init__.py
b/pypy/module/_multibytecodec/__init__.py
--- a/pypy/module/_multibytecodec/__init__.py
+++ b/pypy/module/_multibytecodec/__init__.py
@@ -7,13 +7,14 @@
# for compatibility this name is obscured, and should be called
# via the _codecs_*.py modules written in lib_pypy.
'__getcodec': 'interp_multibytecodec.getcodec',
+
+ 'MultibyteIncrementalDecoder':
+ 'interp_incremental.MultibyteIncrementalDecoder',
}
appleveldefs = {
'MultibyteIncrementalEncoder':
'app_multibytecodec.MultibyteIncrementalEncoder',
- 'MultibyteIncrementalDecoder':
- 'app_multibytecodec.MultibyteIncrementalDecoder',
'MultibyteStreamReader':
'app_multibytecodec.MultibyteStreamReader',
'MultibyteStreamWriter':
diff --git a/pypy/module/_multibytecodec/interp_incremental.py
b/pypy/module/_multibytecodec/interp_incremental.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/interp_incremental.py
@@ -0,0 +1,80 @@
+from pypy.rpython.lltypesystem import lltype
+from pypy.module._multibytecodec import c_codecs
+from pypy.module._multibytecodec.interp_multibytecodec import (
+ MultibyteCodec, wrap_unicodedecodeerror, wrap_runtimeerror)
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.interpreter.typedef import TypeDef
+from pypy.module._codecs.interp_codecs import CodecState
+
+
+class MultibyteIncrementalDecoder(Wrappable):
+
+ def __init__(self, space, errors):
+ if errors is None:
+ errors = 'strict'
+ self.space = space
+ self.errors = errors
+ w_codec = space.getattr(space.wrap(self), space.wrap("codec"))
+ codec = space.interp_w(MultibyteCodec, w_codec)
+ self.codec = codec.codec
+ self.name = codec.name
+ self._initialize()
+
+ def _initialize(self):
+ self.decodebuf = c_codecs.pypy_cjk_dec_new(self.codec)
+ self.pending = ""
+
+ def _free(self):
+ self.pending = None
+ if self.decodebuf:
+ pypy_cjk_dec_free(self.decodebuf)
+ self.decodebuf = lltype.nullptr(DECODEBUF_P.TO)
+
+ def __del__(self):
+ self._free()
+
+ def reset_w(self):
+ self._free()
+ self._initialize()
+
+ @unwrap_spec(object=str, final=bool)
+ def decode_w(self, object, final=False):
+ space = self.space
+ state = space.fromcache(CodecState)
+ if len(self.pending) > 0:
+ object = self.pending + object
+ try:
+ output = c_codecs.decodeex(self.decodebuf, object, self.errors,
+ state.decode_error_handler, self.name,
+ get_ignore_error(final))
+ except c_codecs.EncodeDecodeError, e:
+ raise wrap_unicodedecodeerror(space, e, object, self.name)
+ except RuntimeError:
+ raise wrap_runtimeerror(space)
+ pos = c_codecs.pypy_cjk_dec_inbuf_consumed(self.decodebuf)
+ assert 0 <= pos <= len(object)
+ self.pending = object[pos:]
+ return space.wrap(output)
+
+
+@unwrap_spec(errors="str_or_None")
+def mbidecoder_new(space, w_subtype, errors=None):
+ r = space.allocate_instance(MultibyteIncrementalDecoder, w_subtype)
+ r.__init__(space, errors)
+ return space.wrap(r)
+
+MultibyteIncrementalDecoder.typedef = TypeDef(
+ 'MultibyteIncrementalDecoder',
+ __module__ = '_multibytecodec',
+ __new__ = interp2app(mbidecoder_new),
+ decode = interp2app(MultibyteIncrementalDecoder.decode_w),
+ reset = interp2app(MultibyteIncrementalDecoder.reset_w),
+ )
+
+
+def get_ignore_error(final):
+ if final:
+ return 0 # don't ignore any error
+ else:
+ return c_codecs.MBERR_TOOFEW
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py
b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -22,17 +22,9 @@
output = c_codecs.decode(self.codec, input, errors,
state.decode_error_handler, self.name)
except c_codecs.EncodeDecodeError, e:
- raise OperationError(
- space.w_UnicodeDecodeError,
- space.newtuple([
- space.wrap(self.name),
- space.wrap(input),
- space.wrap(e.start),
- space.wrap(e.end),
- space.wrap(e.reason)]))
+ raise wrap_unicodedecodeerror(space, e, input, self.name)
except RuntimeError:
- raise OperationError(space.w_RuntimeError,
- space.wrap("internal codec error"))
+ raise wrap_runtimeerror(space)
return space.newtuple([space.wrap(output),
space.wrap(len(input))])
@@ -46,17 +38,9 @@
output = c_codecs.encode(self.codec, input, errors,
state.encode_error_handler, self.name)
except c_codecs.EncodeDecodeError, e:
- raise OperationError(
- space.w_UnicodeEncodeError,
- space.newtuple([
- space.wrap(self.name),
- space.wrap(input),
- space.wrap(e.start),
- space.wrap(e.end),
- space.wrap(e.reason)]))
+ raise wrap_unicodeencodeerror(space, e, input, self.name)
except RuntimeError:
- raise OperationError(space.w_RuntimeError,
- space.wrap("internal codec error"))
+ raise wrap_runtimeerror(space)
return space.newtuple([space.wrap(output),
space.wrap(len(input))])
@@ -78,3 +62,28 @@
raise OperationError(space.w_LookupError,
space.wrap("no such codec is supported."))
return space.wrap(MultibyteCodec(name, codec))
+
+
+def wrap_unicodedecodeerror(space, e, input, name):
+ return OperationError(
+ space.w_UnicodeDecodeError,
+ space.newtuple([
+ space.wrap(name),
+ space.wrap(input),
+ space.wrap(e.start),
+ space.wrap(e.end),
+ space.wrap(e.reason)]))
+
+def wrap_unicodeencodeerror(space, e, input, name):
+ raise OperationError(
+ space.w_UnicodeEncodeError,
+ space.newtuple([
+ space.wrap(name),
+ space.wrap(input),
+ space.wrap(e.start),
+ space.wrap(e.end),
+ space.wrap(e.reason)]))
+
+def wrap_runtimeerror(space):
+ raise OperationError(space.w_RuntimeError,
+ space.wrap("internal codec error"))
diff --git a/pypy/module/_multibytecodec/test/test_app_incremental.py
b/pypy/module/_multibytecodec/test/test_app_incremental.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/test/test_app_incremental.py
@@ -0,0 +1,51 @@
+from pypy.conftest import gettestobjspace
+
+
+class AppTestClasses:
+ def setup_class(cls):
+ cls.space = gettestobjspace(usemodules=['_multibytecodec'])
+ cls.w_IncrementalHzDecoder = cls.space.appexec([], """():
+ import _codecs_cn
+ from _multibytecodec import MultibyteIncrementalDecoder
+
+ class IncrementalHzDecoder(MultibyteIncrementalDecoder):
+ codec = _codecs_cn.getcodec('hz')
+
+ return IncrementalHzDecoder
+ """)
+
+ def test_decode_hz(self):
+ d = self.IncrementalHzDecoder()
+ r = d.decode("~{abcd~}")
+ assert r == u'\u5f95\u6c85'
+ r = d.decode("~{efgh~}")
+ assert r == u'\u5f50\u73b7'
+ for c, output in zip("!~{abcd~}xyz~{efgh",
+ [u'!', # !
+ u'', # ~
+ u'', # {
+ u'', # a
+ u'\u5f95', # b
+ u'', # c
+ u'\u6c85', # d
+ u'', # ~
+ u'', # }
+ u'x', # x
+ u'y', # y
+ u'z', # z
+ u'', # ~
+ u'', # {
+ u'', # e
+ u'\u5f50', # f
+ u'', # g
+ u'\u73b7', # h
+ ]):
+ r = d.decode(c)
+ assert r == output
+
+ def test_decode_hz_final(self):
+ d = self.IncrementalHzDecoder()
+ r = d.decode("~{", True)
+ assert r == u''
+ raises(UnicodeDecodeError, d.decode, "~", True)
+ raises(UnicodeDecodeError, d.decode, "~{a", True)
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit