Author: Armin Rigo <[email protected]>
Branch: 
Changeset: r86765:728a9942eb3e
Date: 2016-08-31 10:25 +0200
http://bitbucket.org/pypy/pypy/changeset/728a9942eb3e/

Log:    Issue #2386: non-latin1 unicode keys were ignored in
        'unicode.format(**d)'

diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -8,6 +8,7 @@
 from rpython.rlib.objectmodel import specialize
 from rpython.rlib.rfloat import copysign, formatd
 from rpython.rlib.rarithmetic import r_uint, intmask
+from pypy.interpreter.signature import Signature
 
 
 @specialize.argtype(1)
@@ -40,6 +41,9 @@
 ANS_MANUAL = 3
 
 
+format_signature = Signature([], 'args', 'kwargs')
+
+
 def make_template_formatting_class(for_unicode):
     class TemplateFormatter(object):
         is_unicode = for_unicode
@@ -52,7 +56,17 @@
             self.template = template
 
         def build(self, args):
-            self.args, self.kwargs = args.unpack()
+            if self.is_unicode:
+                # for unicode, use the slower parse_obj() to get self.w_kwargs
+                # as a wrapped dictionary that may contain full-range unicode
+                # keys.  See test_non_latin1_key
+                space = self.space
+                w_args, w_kwds = args.parse_obj(None, 'format',
+                                                format_signature)
+                self.args = space.listview(w_args)
+                self.w_kwargs = w_kwds
+            else:
+                self.args, self.kwargs = args.unpack()
             self.auto_numbering = 0
             self.auto_numbering_state = ANS_INIT
             return self._build_string(0, len(self.template), 2)
@@ -197,17 +211,13 @@
             if index == -1:
                 kwarg = name[:i]
                 if self.is_unicode:
+                    w_arg = space.getitem(self.w_kwargs, space.wrap(kwarg))
+                else:
                     try:
-                        arg_key = kwarg.encode("latin-1")
-                    except UnicodeEncodeError:
-                        # Not going to be found in a dict of strings.
-                        raise OperationError(space.w_KeyError, 
space.wrap(kwarg))
-                else:
-                    arg_key = kwarg
-                try:
-                    w_arg = self.kwargs[arg_key]
-                except KeyError:
-                    raise OperationError(space.w_KeyError, space.wrap(arg_key))
+                        w_arg = self.kwargs[kwarg]
+                    except KeyError:
+                        raise OperationError(space.w_KeyError,
+                                             space.wrap(kwarg))
             else:
                 try:
                     w_arg = self.args[index]
diff --git a/pypy/objspace/std/test/test_newformat.py 
b/pypy/objspace/std/test/test_newformat.py
--- a/pypy/objspace/std/test/test_newformat.py
+++ b/pypy/objspace/std/test/test_newformat.py
@@ -215,7 +215,9 @@
         assert self.s("{!r}").format(x()) == self.s("32")
 
     def test_non_latin1_key(self):
-        raises(KeyError, self.s("{\u1000}").format)
+        raises(KeyError, u"{\u1000}".format)
+        d = {u"\u1000": u"foo"}
+        assert u"{\u1000}".format(**d) == u"foo"
 
 
 class AppTestStringFormat(BaseStringFormatTests):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to