[pypy-commit] pypy unicode-utf8: whack whack whack;

fijal Sat, 07 Oct 2017 06:11:52 -0700

Author: fijal
Branch: unicode-utf8
Changeset: r92613:15eb01ac7f57
Date: 2017-10-05 17:14 +0200
http://bitbucket.org/pypy/pypy/changeset/15eb01ac7f57/


Log:    whack whack whack;

diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -126,7 +126,7 @@
         self.orig = handler
 
     def handle(self, errors, encoding, msg, s, pos, endpos):
-        s, p, lgt = self.orig(errors, encoding, msg, s, pos, endpos)
+        s, p = self.orig(errors, encoding, msg, s, pos, endpos)
         return s.decode("utf8"), p
 
 class EncodeWrapper(object):
@@ -134,8 +134,7 @@
         self.orig = handler
 
     def handle(self, errors, encoding, msg, s, pos, endpos):
-        s, rs, p, lgt = self.orig(errors, encoding, msg, s.encode("utf8"), 
pos, endpos)
-        return s, rs, p
+        return self.orig(errors, encoding, msg, s.encode("utf8"), pos, endpos)
 
 # some irregular interfaces
 def str_decode_utf8(s, slen, errors, final, errorhandler):
diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -66,7 +66,7 @@
                             "position %d from error handler out of bounds",
                             newpos)
             w_replace = space.convert_to_w_unicode(w_replace)
-            return w_replace._utf8, newpos, w_replace._length
+            return w_replace._utf8, newpos
         return call_errorhandler
 
     def make_decode_errorhandler(self, space):
@@ -443,8 +443,7 @@
 # "allow_surrogates=True"
 @unwrap_spec(utf8='utf8', errors='text_or_none')
 def utf_8_encode(space, utf8, errors="strict"):
-    raise Exception('foo')
-    return space.newtuple([space.newbytes(utf8), space.newint(utf8len)])
+    return space.newtuple([space.newbytes(utf8), 
space.newint(rutf8.check_utf8(utf8))])
 #@unwrap_spec(uni=unicode, errors='text_or_none')
 #def utf_8_encode(space, uni, errors="strict"):
 #    if errors is None:
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -776,31 +776,28 @@
 
 
 def encode_object(space, w_object, encoding, errors):
+    w_encoder = None
     if encoding is None:
         # Get the encoder functions as a wrapped object.
         # This lookup is cached.
         w_encoder = space.sys.get_w_default_encoder()
-    else:
-        if errors is None or errors == 'strict':
-            if encoding == 'ascii':
-                s = space.utf8_w(w_object)
-                try:
-                    rutf8.check_ascii(s)
-                except rutf8.CheckError as a:
-                    eh = unicodehelper.encode_error_handler(space)
-                    u_len = w_object._len()
-                    eh(None, "ascii", "ordinal not in range(128)", s, u_len,
-                        a.pos, a.pos + 1)
-                    assert False, "always raises"
-                return space.newbytes(s)
-            if encoding == 'utf-8':
-                u = space.utf8_w(w_object)
-                return space.newbytes(u)
-                # XXX is this enough?
-                #eh = unicodehelper.raise_unicode_exception_encode
-                #return space.newbytes(unicode_encode_utf_8(
-                #        u, len(u), None, errorhandler=eh,
-                #        allow_surrogates=True))
+    if errors is None or errors == 'strict':
+        if ((encoding is None and space.sys.defaultencoding == 'ascii') or
+             encoding == 'ascii'):
+            s = space.utf8_w(w_object)
+            try:
+                rutf8.check_ascii(s)
+            except rutf8.CheckError as a:
+                eh = unicodehelper.encode_error_handler(space)
+                u_len = w_object._len()
+                eh(None, "ascii", "ordinal not in range(128)", s, u_len,
+                    a.pos, a.pos + 1)
+                assert False, "always raises"
+            return space.newbytes(s)
+        if ((encoding is None and space.sys.defaultencoding == 'utf8') or
+             encoding == 'utf-8'):
+            return space.newbytes(space.utf8_w(w_object))
+    if w_encoder is None:
         from pypy.module._codecs.interp_codecs import lookup_codec
         w_encoder = space.getitem(lookup_codec(space, encoding), 
space.newint(0))
     if errors is None:
@@ -821,7 +818,6 @@
         encoding = getdefaultencoding(space)
     if errors is None or errors == 'strict':
         if encoding == 'ascii':
-            # XXX error handling
             s = space.charbuf_w(w_obj)
             try:
                 rutf8.check_ascii(s)
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -1280,8 +1280,9 @@
             collend = pos+1
             while collend < len(p) and ord(p[collend]) >= limit:
                 collend += 1
-            ru, rs, pos = errorhandler(errors, encoding, reason, p,
+            ru, pos = errorhandler(errors, encoding, reason, p,
                                        collstart, collend)
+            rs = None
             if rs is not None:
                 # py3k only
                 result.append(rs)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8: whack whack whack;

Reply via email to