Author: fijal Branch: unicode-utf8 Changeset: r92613:15eb01ac7f57 Date: 2017-10-05 17:14 +0200 http://bitbucket.org/pypy/pypy/changeset/15eb01ac7f57/
Log: whack whack whack; diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -126,7 +126,7 @@ self.orig = handler def handle(self, errors, encoding, msg, s, pos, endpos): - s, p, lgt = self.orig(errors, encoding, msg, s, pos, endpos) + s, p = self.orig(errors, encoding, msg, s, pos, endpos) return s.decode("utf8"), p class EncodeWrapper(object): @@ -134,8 +134,7 @@ self.orig = handler def handle(self, errors, encoding, msg, s, pos, endpos): - s, rs, p, lgt = self.orig(errors, encoding, msg, s.encode("utf8"), pos, endpos) - return s, rs, p + return self.orig(errors, encoding, msg, s.encode("utf8"), pos, endpos) # some irregular interfaces def str_decode_utf8(s, slen, errors, final, errorhandler): diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -66,7 +66,7 @@ "position %d from error handler out of bounds", newpos) w_replace = space.convert_to_w_unicode(w_replace) - return w_replace._utf8, newpos, w_replace._length + return w_replace._utf8, newpos return call_errorhandler def make_decode_errorhandler(self, space): @@ -443,8 +443,7 @@ # "allow_surrogates=True" @unwrap_spec(utf8='utf8', errors='text_or_none') def utf_8_encode(space, utf8, errors="strict"): - raise Exception('foo') - return space.newtuple([space.newbytes(utf8), space.newint(utf8len)]) + return space.newtuple([space.newbytes(utf8), space.newint(rutf8.check_utf8(utf8))]) #@unwrap_spec(uni=unicode, errors='text_or_none') #def utf_8_encode(space, uni, errors="strict"): # if errors is None: diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -776,31 +776,28 @@ def encode_object(space, w_object, encoding, errors): + w_encoder = None if encoding is None: # Get the encoder functions as a wrapped object. # This lookup is cached. w_encoder = space.sys.get_w_default_encoder() - else: - if errors is None or errors == 'strict': - if encoding == 'ascii': - s = space.utf8_w(w_object) - try: - rutf8.check_ascii(s) - except rutf8.CheckError as a: - eh = unicodehelper.encode_error_handler(space) - u_len = w_object._len() - eh(None, "ascii", "ordinal not in range(128)", s, u_len, - a.pos, a.pos + 1) - assert False, "always raises" - return space.newbytes(s) - if encoding == 'utf-8': - u = space.utf8_w(w_object) - return space.newbytes(u) - # XXX is this enough? - #eh = unicodehelper.raise_unicode_exception_encode - #return space.newbytes(unicode_encode_utf_8( - # u, len(u), None, errorhandler=eh, - # allow_surrogates=True)) + if errors is None or errors == 'strict': + if ((encoding is None and space.sys.defaultencoding == 'ascii') or + encoding == 'ascii'): + s = space.utf8_w(w_object) + try: + rutf8.check_ascii(s) + except rutf8.CheckError as a: + eh = unicodehelper.encode_error_handler(space) + u_len = w_object._len() + eh(None, "ascii", "ordinal not in range(128)", s, u_len, + a.pos, a.pos + 1) + assert False, "always raises" + return space.newbytes(s) + if ((encoding is None and space.sys.defaultencoding == 'utf8') or + encoding == 'utf-8'): + return space.newbytes(space.utf8_w(w_object)) + if w_encoder is None: from pypy.module._codecs.interp_codecs import lookup_codec w_encoder = space.getitem(lookup_codec(space, encoding), space.newint(0)) if errors is None: @@ -821,7 +818,6 @@ encoding = getdefaultencoding(space) if errors is None or errors == 'strict': if encoding == 'ascii': - # XXX error handling s = space.charbuf_w(w_obj) try: rutf8.check_ascii(s) diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -1280,8 +1280,9 @@ collend = pos+1 while collend < len(p) and ord(p[collend]) >= limit: collend += 1 - ru, rs, pos = errorhandler(errors, encoding, reason, p, + ru, pos = errorhandler(errors, encoding, reason, p, collstart, collend) + rs = None if rs is not None: # py3k only result.append(rs) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit