[issue1303] adapt str8 constructor to bytes constructor

Georg Brandl Fri, 19 Oct 2007 15:45:00 -0700

New submission from Georg Brandl:

This makes the str8 constructor accept the same kinds of types as the
bytes constructor. I had to fix instances of str8("abc") to str8(b"abc")
to make tests pass again. The only remaining failure should be test_str
-- the string test suite must be thoroughly redesigned to fit all three
string-like types.


----------
assignee: gvanrossum
components: Interpreter Core
files: str_constructor.diff
keywords: py3k
messages: 56572
nosy: georg.brandl, gvanrossum
severity: normal
status: open
title: adapt str8 constructor to bytes constructor
versions: Python 3.0
Added file: http://bugs.python.org/file8570/str_constructor.diff

__________________________________
Tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue1303>
__________________________________

Index: Objects/stringobject.c
===================================================================
--- Objects/stringobject.c      (Revision 58552)
+++ Objects/stringobject.c      (Arbeitskopie)
@@ -3020,16 +3020,149 @@
 static PyObject *
 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
-       PyObject *x = NULL;
-       static char *kwlist[] = {"object", 0};
+       PyObject *x = NULL, *it;
+       PyObject *(*iternext)(PyObject *);
+       const char *encoding = NULL;
+       const char *errors = NULL;
+       PyObject *new = NULL;
+       Py_ssize_t i, size;
+       static char *kwlist[] = {"object", "encoding", "errors", 0};
 
        if (type != &PyString_Type)
                return str_subtype_new(type, args, kwds);
-       if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str8", kwlist, &x))
+       if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str8", kwlist, &x,
+                                        &encoding, &errors))
                return NULL;
-       if (x == NULL)
+       if (x == NULL) {
+               if (encoding != NULL || errors != NULL) {
+                       PyErr_SetString(PyExc_TypeError,
+                                       "encoding or errors without sequence "
+                                       "argument");
+                       return NULL;
+               }
                return PyString_FromString("");
-       return PyObject_Str(x);
+       }
+
+       if (PyUnicode_Check(x)) {
+               /* Encode via the codec registry */
+               if (encoding == NULL) {
+                       PyErr_SetString(PyExc_TypeError,
+                                       "string argument without an encoding");
+                       return NULL;
+               }
+               new = PyCodec_Encode(x, encoding, errors);
+               if (new == NULL)
+                       return NULL;
+               /* XXX(gb): must accept bytes here since codecs output bytes
+                  at the moment */
+               if (PyBytes_Check(new)) {
+                       PyObject *str;
+                       str = PyString_FromString(PyBytes_AsString(new));
+                       Py_DECREF(new);
+                       if (!str)
+                               return NULL;
+                       return str;
+               }
+               if (!PyString_Check(new)) {
+                       PyErr_Format(PyExc_TypeError,
+                                    "encoder did not return a str8 "
+                                    "object (type=%.400s)",
+                                    Py_Type(new)->tp_name);
+                       Py_DECREF(new);
+                       return NULL;
+               }
+               return new;
+       }
+
+       /* If it's not unicode, there can't be encoding or errors */
+       if (encoding != NULL || errors != NULL) {
+               PyErr_SetString(PyExc_TypeError,
+                               "encoding or errors without a string argument");
+               return NULL;
+       }
+
+       /* Use the modern buffer interface */
+       if (PyObject_CheckBuffer(x)) {
+               Py_buffer view;
+               if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
+                       return NULL;
+               new = PyString_FromStringAndSize(NULL, view.len);
+               if (!new)
+                       goto fail;
+               if (PyBuffer_ToContiguous(((PyStringObject *)new)->ob_sval,
+                                         &view, view.len, 'C') < 0)
+                       goto fail;
+               PyObject_ReleaseBuffer(x, &view);
+               return new;
+         fail:
+               Py_XDECREF(new);
+               PyObject_ReleaseBuffer(x, &view);
+               return NULL;
+       }
+
+       /* For the iterator version, create a string object and resize as 
needed. */
+       /* XXX(gb): is 64 a good value? also, optimize this if length is known 
*/
+       size = 64;
+       new = PyString_FromStringAndSize(NULL, size);
+       if (new == NULL)
+               return NULL;
+
+       /* XXX Optimize this if the arguments is a list, tuple */
+
+       /* Get the iterator */
+       it = PyObject_GetIter(x);
+       if (it == NULL)
+               goto error;
+       iternext = *Py_Type(it)->tp_iternext;
+
+       /* Run the iterator to exhaustion */
+       for (i = 0; ; i++) {
+               PyObject *item;
+               Py_ssize_t value;
+
+               /* Get the next item */
+               item = iternext(it);
+               if (item == NULL) {
+                       if (PyErr_Occurred()) {
+                               if 
(!PyErr_ExceptionMatches(PyExc_StopIteration))
+                                       goto error;
+                               PyErr_Clear();
+                       }
+                       break;
+               }
+
+               /* Interpret it as an int (__index__) */
+               value = PyNumber_AsSsize_t(item, PyExc_ValueError);
+               Py_DECREF(item);
+               if (value == -1 && PyErr_Occurred())
+                       goto error;
+
+               /* Range check */
+               if (value < 0 || value >= 256) {
+                       PyErr_SetString(PyExc_ValueError,
+                                       "bytes must be in range(0, 256)");
+                       goto error;
+               }
+
+               /* Append the byte */
+               if (i >= size) {
+                       size *= 2;
+                       if (_PyString_Resize(&new, size) < 0)
+                               goto error;
+               }
+               ((PyStringObject *)new)->ob_sval[i] = value;
+       }
+       _PyString_Resize(&new, i);
+
+       /* Clean up and return success */
+       Py_DECREF(it);
+       return new;
+
+  error:
+       /* Error handling when it != NULL */
+       Py_XDECREF(it);
+       Py_DECREF(new);
+       return NULL;
 }
 
 static PyObject *
Index: Lib/struct.py
===================================================================
--- Lib/struct.py       (Revision 58552)
+++ Lib/struct.py       (Arbeitskopie)
@@ -36,7 +36,7 @@
 class Struct(_Struct):
     def __init__(self, fmt):
         if isinstance(fmt, str):
-            fmt = str8(fmt)
+            fmt = str8(fmt, 'latin1')
         _Struct.__init__(self, fmt)
 
 _MAXCACHE = 100
Index: Lib/pickletools.py
===================================================================
--- Lib/pickletools.py  (Revision 58552)
+++ Lib/pickletools.py  (Arbeitskopie)
@@ -1978,7 +1978,7 @@
 
 _dis_test = r"""
 >>> import pickle
->>> x = [1, 2, (3, 4), {str8('abc'): "def"}]
+>>> x = [1, 2, (3, 4), {str8(b'abc'): "def"}]
 >>> pkl = pickle.dumps(x, 0)
 >>> dis(pkl)
     0: (    MARK
Index: Lib/test/test_io.py
===================================================================
--- Lib/test/test_io.py (Revision 58552)
+++ Lib/test/test_io.py (Arbeitskopie)
@@ -88,7 +88,7 @@
         self.assertEqual(f.tell(), 6)
         self.assertEqual(f.seek(-1, 1), 5)
         self.assertEqual(f.tell(), 5)
-        self.assertEqual(f.write(str8(" world\n\n\n")), 9)
+        self.assertEqual(f.write(str8(b" world\n\n\n")), 9)
         self.assertEqual(f.seek(0), 0)
         self.assertEqual(f.write(b"h"), 1)
         self.assertEqual(f.seek(-1, 2), 13)
Index: Lib/test/test_codeccallbacks.py
===================================================================
--- Lib/test/test_codeccallbacks.py     (Revision 58552)
+++ Lib/test/test_codeccallbacks.py     (Arbeitskopie)
@@ -181,7 +181,7 @@
         # mapped through the encoding again. This means, that
         # to be able to use e.g. the "replace" handler, the
         # charmap has to have a mapping for "?".
-        charmap = dict((ord(c), str8(2*c.upper())) for c in "abcdefgh")
+        charmap = dict((ord(c), str8(2*c.upper(), 'ascii')) for c in 
"abcdefgh")
         sin = "abc"
         sout = b"AABBCC"
         self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], 
sout)
@@ -189,7 +189,7 @@
         sin = "abcA"
         self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", 
charmap)
 
-        charmap[ord("?")] = str8("XYZ")
+        charmap[ord("?")] = str8(b"XYZ")
         sin = "abcDEF"
         sout = b"AABBCCXYZXYZXYZ"
         self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], 
sout)
@@ -309,7 +309,7 @@
         # check with one argument too much
         self.assertRaises(TypeError, exctype, *(args + ["too much"]))
         # check with one argument of the wrong type
-        wrongargs = [ "spam", str8("eggs"), b"spam", 42, 1.0, None ]
+        wrongargs = [ "spam", str8(b"eggs"), b"spam", 42, 1.0, None ]
         for i in range(len(args)):
             for wrongarg in wrongargs:
                 if type(wrongarg) is type(args[i]):
Index: Lib/test/test_locale.py
===================================================================
--- Lib/test/test_locale.py     (Revision 58552)
+++ Lib/test/test_locale.py     (Arbeitskopie)
@@ -82,7 +82,7 @@
 
 # Test BSD Rune locale's bug for isctype functions.
 def teststrop(s, method, output):
-    s = str8(s)
+    s = str8(s, 'latin1') # XXX
     if verbose:
         print("%s.%s() =? %s ..." % (repr(s), method, repr(output)), end=' ')
     result = getattr(s, method)()
Index: Lib/test/testcodec.py
===================================================================
--- Lib/test/testcodec.py       (Revision 58552)
+++ Lib/test/testcodec.py       (Arbeitskopie)
@@ -36,7 +36,7 @@
 decoding_map = codecs.make_identity_dict(range(256))
 decoding_map.update({
         0x78: "abc", # 1-n decoding mapping
-        str8("abc"): 0x0078,# 1-n encoding mapping
+        str8(b"abc"): 0x0078,# 1-n encoding mapping
         0x01: None,   # decoding mapping to <undefined>
         0x79: "",    # decoding mapping to <remove character>
 })
Index: Lib/test/test_builtin.py
===================================================================
--- Lib/test/test_builtin.py    (Revision 58552)
+++ Lib/test/test_builtin.py    (Arbeitskopie)
@@ -580,7 +580,8 @@
         self.assertEqual(hash(1), hash(1))
         self.assertEqual(hash(1), hash(1.0))
         hash('spam')
-        self.assertEqual(hash('spam'), hash(str8('spam')))
+        self.assertEqual(hash('spam'), hash(str8(b'spam'))) # remove str8()
+                                                    # when b"" is immutable
         hash((0,1,2,3))
         def f(): pass
         self.assertRaises(TypeError, hash, [])
Index: Lib/test/test_sys.py
===================================================================
--- Lib/test/test_sys.py        (Revision 58552)
+++ Lib/test/test_sys.py        (Arbeitskopie)
@@ -300,7 +300,7 @@
 
     def test_intern(self):
         self.assertRaises(TypeError, sys.intern)
-        s = str8("never interned before")
+        s = str8(b"never interned before")
         self.assert_(sys.intern(s) is s)
         s2 = s.swapcase().swapcase()
         self.assert_(sys.intern(s2) is s)
@@ -314,7 +314,7 @@
             def __hash__(self):
                 return 123
 
-        self.assertRaises(TypeError, sys.intern, S("abc"))
+        self.assertRaises(TypeError, sys.intern, S(b"abc"))
 
         s = "never interned as unicode before"
         self.assert_(sys.intern(s) is s)
Index: Lib/test/test_format.py
===================================================================
--- Lib/test/test_format.py     (Revision 58552)
+++ Lib/test/test_format.py     (Arbeitskopie)
@@ -40,7 +40,7 @@
                 print('yes')
 
 def testboth(formatstr, *args):
-    testformat(str8(formatstr), *args)
+    testformat(str8(formatstr, 'ascii'), *args)
     testformat(formatstr, *args)

_______________________________________________
Python-bugs-list mailing list 
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

[issue1303] adapt str8 constructor to bytes constructor

Reply via email to