New submission from Georg Brandl:
This makes the str8 constructor accept the same kinds of types as the
bytes constructor. I had to fix instances of str8(abc) to str8(babc)
to make tests pass again. The only remaining failure should be test_str
-- the string test suite must be thoroughly redesigned to fit all three
string-like types.
--
assignee: gvanrossum
components: Interpreter Core
files: str_constructor.diff
keywords: py3k
messages: 56572
nosy: georg.brandl, gvanrossum
severity: normal
status: open
title: adapt str8 constructor to bytes constructor
versions: Python 3.0
Added file: http://bugs.python.org/file8570/str_constructor.diff
__
Tracker [EMAIL PROTECTED]
http://bugs.python.org/issue1303
__Index: Objects/stringobject.c
===
--- Objects/stringobject.c (Revision 58552)
+++ Objects/stringobject.c (Arbeitskopie)
@@ -3020,16 +3020,149 @@
static PyObject *
string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
- PyObject *x = NULL;
- static char *kwlist[] = {object, 0};
+ PyObject *x = NULL, *it;
+ PyObject *(*iternext)(PyObject *);
+ const char *encoding = NULL;
+ const char *errors = NULL;
+ PyObject *new = NULL;
+ Py_ssize_t i, size;
+ static char *kwlist[] = {object, encoding, errors, 0};
if (type != PyString_Type)
return str_subtype_new(type, args, kwds);
- if (!PyArg_ParseTupleAndKeywords(args, kwds, |O:str8, kwlist, x))
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, |Oss:str8, kwlist, x,
+encoding, errors))
return NULL;
- if (x == NULL)
+ if (x == NULL) {
+ if (encoding != NULL || errors != NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ encoding or errors without sequence
+ argument);
+ return NULL;
+ }
return PyString_FromString();
- return PyObject_Str(x);
+ }
+
+ if (PyUnicode_Check(x)) {
+ /* Encode via the codec registry */
+ if (encoding == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ string argument without an encoding);
+ return NULL;
+ }
+ new = PyCodec_Encode(x, encoding, errors);
+ if (new == NULL)
+ return NULL;
+ /* XXX(gb): must accept bytes here since codecs output bytes
+ at the moment */
+ if (PyBytes_Check(new)) {
+ PyObject *str;
+ str = PyString_FromString(PyBytes_AsString(new));
+ Py_DECREF(new);
+ if (!str)
+ return NULL;
+ return str;
+ }
+ if (!PyString_Check(new)) {
+ PyErr_Format(PyExc_TypeError,
+encoder did not return a str8
+object (type=%.400s),
+Py_Type(new)-tp_name);
+ Py_DECREF(new);
+ return NULL;
+ }
+ return new;
+ }
+
+ /* If it's not unicode, there can't be encoding or errors */
+ if (encoding != NULL || errors != NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ encoding or errors without a string argument);
+ return NULL;
+ }
+
+ /* Use the modern buffer interface */
+ if (PyObject_CheckBuffer(x)) {
+ Py_buffer view;
+ if (PyObject_GetBuffer(x, view, PyBUF_FULL_RO) 0)
+ return NULL;
+ new = PyString_FromStringAndSize(NULL, view.len);
+ if (!new)
+ goto fail;
+ if (PyBuffer_ToContiguous(((PyStringObject *)new)-ob_sval,
+ view, view.len, 'C') 0)
+ goto fail;
+ PyObject_ReleaseBuffer(x, view);
+ return new;
+ fail:
+ Py_XDECREF(new);
+ PyObject_ReleaseBuffer(x, view);
+ return NULL;
+ }
+
+ /* For the iterator version, create a string object and resize as
needed. */
+ /* XXX(gb): is 64 a good value? also, optimize this if length is known
*/
+ size = 64;
+ new = PyString_FromStringAndSize(NULL, size);
+ if (new == NULL)
+ return NULL;
+
+ /* XXX Optimize this if the arguments is a list, tuple */
+
+ /* Get the iterator */
+ it = PyObject_GetIter(x);
+ if (it == NULL)
+