Serhiy Storchaka added the comment:
Crash is possible not only when reading from text files, but also when decoder
returns a non-string or when decoder's state is not a bytes object. This is
possible with malicious decoder and perhaps with some old not bytes-to-string
decoder in stdlib codecs registry.
Here are patches for different versions.
----------
keywords: +patch
Added file: http://bugs.python.org/file28938/textio_type_check-2.7.patch
Added file: http://bugs.python.org/file28939/textio_type_check-3.3.patch
Added file: http://bugs.python.org/file28940/textio_type_check-3.2.patch
_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue17106>
_______________________________________
diff -r c37ac05119ff Modules/_io/textio.c
--- a/Modules/_io/textio.c Sat Feb 02 15:06:45 2013 -0800
+++ b/Modules/_io/textio.c Sun Feb 03 10:41:09 2013 +0200
@@ -269,8 +269,9 @@
return NULL;
if (!PyUnicode_Check(output)) {
- PyErr_SetString(PyExc_TypeError,
- "decoder should return a string result");
+ PyErr_Format(PyExc_TypeError,
+ "decoder should return a string result, not '%.200s'",
+ Py_TYPE(output)->tp_name);
goto error;
}
@@ -1417,7 +1418,12 @@
Py_DECREF(chunk_size);
if (input_chunk == NULL)
goto fail;
- assert(PyBytes_Check(input_chunk));
+ if (!PyBytes_Check(input_chunk)) {
+ PyErr_Format(PyExc_IOError,
+ "underlying read1() should have returned a bytes object, "
+ "not '%.200s'", Py_TYPE(input_chunk)->tp_name);
+ goto fail;
+ }
eof = (PyBytes_Size(input_chunk) == 0);
@@ -1444,7 +1450,14 @@
PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
if (next_input == NULL)
goto fail;
- assert (PyBytes_Check(next_input));
+ if (!PyBytes_Check(next_input)) {
+ PyErr_Format(PyExc_TypeError,
+ "decoder getstate() should have returned a bytes "
+ "object, not '%.200s'",
+ Py_TYPE(next_input)->tp_name);
+ Py_DECREF(next_input);
+ goto fail;
+ }
Py_DECREF(dec_buffer);
Py_CLEAR(self->snapshot);
self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
@@ -2110,7 +2123,14 @@
if (input_chunk == NULL)
goto fail;
- assert (PyBytes_Check(input_chunk));
+ if (!PyBytes_Check(input_chunk)) {
+ PyErr_Format(PyExc_IOError,
+ "underlying read() should have returned a bytes "
+ "object, not '%.200s'",
+ Py_TYPE(input_chunk)->tp_name);
+ Py_DECREF(input_chunk);
+ goto fail;
+ }
self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
if (self->snapshot == NULL) {
@@ -2247,7 +2267,13 @@
self->decoder, "decode", "s#", input, 1);
if (decoded == NULL)
goto fail;
- assert (PyUnicode_Check(decoded));
+ if (!PyUnicode_Check(decoded)) {
+ PyErr_Format(PyExc_TypeError,
+ "decoder should return a string result, not '%.200s'",
+ Py_TYPE(decoded)->tp_name);
+ Py_DECREF(decoded);
+ goto fail;
+ }
chars_decoded += PyUnicode_GET_SIZE(decoded);
Py_DECREF(decoded);
@@ -2281,7 +2307,13 @@
self->decoder, "decode", "si", "", /* final = */ 1);
if (decoded == NULL)
goto fail;
- assert (PyUnicode_Check(decoded));
+ if (!PyUnicode_Check(decoded)) {
+ PyErr_Format(PyExc_TypeError,
+ "decoder should return a string result, not '%.200s'",
+ Py_TYPE(decoded)->tp_name);
+ Py_DECREF(decoded);
+ goto fail;
+ }
chars_decoded += PyUnicode_GET_SIZE(decoded);
Py_DECREF(decoded);
cookie.need_eof = 1;
@@ -2440,7 +2472,7 @@
Py_DECREF(res);
if (r < 0)
return NULL;
-
+
if (r > 0) {
Py_RETURN_NONE; /* stream already closed */
}
diff -r 3653d8174b0b Modules/_io/textio.c
--- a/Modules/_io/textio.c Sat Feb 02 15:12:59 2013 -0800
+++ b/Modules/_io/textio.c Sun Feb 03 10:42:13 2013 +0200
@@ -290,8 +290,9 @@
return NULL;
if (!PyUnicode_Check(output)) {
- PyErr_SetString(PyExc_TypeError,
- "decoder should return a string result");
+ PyErr_Format(PyExc_TypeError,
+ "decoder should return a string result, not '%.200s'",
+ Py_TYPE(output)->tp_name);
goto error;
}
@@ -1458,7 +1459,13 @@
Py_DECREF(chunk_size);
if (input_chunk == NULL)
goto fail;
- assert(PyBytes_Check(input_chunk));
+ if (!PyBytes_Check(input_chunk)) {
+ PyErr_Format(PyExc_IOError,
+ "underlying %s() should have returned a bytes object, "
+ "not '%.200s'", (self->has_read1 ? "read1": "read"),
+ Py_TYPE(input_chunk)->tp_name);
+ goto fail;
+ }
nbytes = PyBytes_Size(input_chunk);
eof = (nbytes == 0);
@@ -1493,7 +1500,14 @@
PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
if (next_input == NULL)
goto fail;
- assert (PyBytes_Check(next_input));
+ if (!PyBytes_Check(next_input)) {
+ PyErr_Format(PyExc_TypeError,
+ "decoder getstate() should have returned a bytes "
+ "object, not '%.200s'",
+ Py_TYPE(next_input)->tp_name);
+ Py_DECREF(next_input);
+ goto fail;
+ }
Py_DECREF(dec_buffer);
Py_CLEAR(self->snapshot);
self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
@@ -2151,7 +2165,14 @@
if (input_chunk == NULL)
goto fail;
- assert (PyBytes_Check(input_chunk));
+ if (!PyBytes_Check(input_chunk)) {
+ PyErr_Format(PyExc_IOError,
+ "underlying read() should have returned a bytes "
+ "object, not '%.200s'",
+ Py_TYPE(input_chunk)->tp_name);
+ Py_DECREF(input_chunk);
+ goto fail;
+ }
self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
if (self->snapshot == NULL) {
@@ -2283,13 +2304,18 @@
Py_DECREF(_state); \
} while (0)
- /* TODO: replace assert with exception */
#define DECODER_DECODE(start, len, res) do { \
PyObject *_decoded = _PyObject_CallMethodId( \
self->decoder, &PyId_decode, "y#", start, len); \
if (_decoded == NULL) \
goto fail; \
- assert (PyUnicode_Check(_decoded)); \
+ if (!PyUnicode_Check(_decoded)) { \
+ PyErr_Format(PyExc_TypeError, \
+ "decoder should return a string result, not
'%.200s'", \
+ Py_TYPE(_decoded)->tp_name); \
+ Py_DECREF(_decoded); \
+ goto fail; \
+ } \
res = PyUnicode_GET_LENGTH(_decoded); \
Py_DECREF(_decoded); \
} while (0)
@@ -2372,7 +2398,13 @@
self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
if (decoded == NULL)
goto fail;
- assert (PyUnicode_Check(decoded));
+ if (!PyUnicode_Check(decoded)) {
+ PyErr_Format(PyExc_TypeError,
+ "decoder should return a string result, not '%.200s'",
+ Py_TYPE(decoded)->tp_name);
+ Py_DECREF(decoded);
+ goto fail;
+ }
chars_decoded += PyUnicode_GET_LENGTH(decoded);
Py_DECREF(decoded);
cookie.need_eof = 1;
diff -r 6c9f4c22fd81 Modules/_io/textio.c
--- a/Modules/_io/textio.c Sat Feb 02 15:08:52 2013 -0800
+++ b/Modules/_io/textio.c Sun Feb 03 10:37:22 2013 +0200
@@ -269,8 +269,9 @@
return NULL;
if (!PyUnicode_Check(output)) {
- PyErr_SetString(PyExc_TypeError,
- "decoder should return a string result");
+ PyErr_Format(PyExc_TypeError,
+ "decoder should return a string result, not '%.200s'",
+ Py_TYPE(output)->tp_name);
goto error;
}
@@ -1454,7 +1455,13 @@
Py_DECREF(chunk_size);
if (input_chunk == NULL)
goto fail;
- assert(PyBytes_Check(input_chunk));
+ if (!PyBytes_Check(input_chunk)) {
+ PyErr_Format(PyExc_IOError,
+ "underlying %s() should have returned a bytes object, "
+ "not '%.200s'", (self->has_read1 ? "read1": "read"),
+ Py_TYPE(input_chunk)->tp_name);
+ goto fail;
+ }
eof = (PyBytes_Size(input_chunk) == 0);
@@ -1481,7 +1488,14 @@
PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
if (next_input == NULL)
goto fail;
- assert (PyBytes_Check(next_input));
+ if (!PyBytes_Check(next_input)) {
+ PyErr_Format(PyExc_TypeError,
+ "decoder getstate() should have returned a bytes "
+ "object, not '%.200s'",
+ Py_TYPE(next_input)->tp_name);
+ Py_DECREF(next_input);
+ goto fail;
+ }
Py_DECREF(dec_buffer);
Py_CLEAR(self->snapshot);
self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
@@ -2123,7 +2137,14 @@
if (input_chunk == NULL)
goto fail;
- assert (PyBytes_Check(input_chunk));
+ if (!PyBytes_Check(input_chunk)) {
+ PyErr_Format(PyExc_IOError,
+ "underlying read() should have returned a bytes "
+ "object, not '%.200s'",
+ Py_TYPE(input_chunk)->tp_name);
+ Py_DECREF(input_chunk);
+ goto fail;
+ }
self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
if (self->snapshot == NULL) {
@@ -2259,7 +2280,13 @@
self->decoder, "decode", "y#", input, 1);
if (decoded == NULL)
goto fail;
- assert (PyUnicode_Check(decoded));
+ if (!PyUnicode_Check(decoded)) {
+ PyErr_Format(PyExc_TypeError,
+ "decoder should return a string result, not '%.200s'",
+ Py_TYPE(decoded)->tp_name);
+ Py_DECREF(decoded);
+ goto fail;
+ }
chars_decoded += PyUnicode_GET_SIZE(decoded);
Py_DECREF(decoded);
@@ -2293,7 +2320,13 @@
self->decoder, "decode", "yi", "", /* final = */ 1);
if (decoded == NULL)
goto fail;
- assert (PyUnicode_Check(decoded));
+ if (!PyUnicode_Check(decoded)) {
+ PyErr_Format(PyExc_TypeError,
+ "decoder should return a string result, not '%.200s'",
+ Py_TYPE(decoded)->tp_name);
+ Py_DECREF(decoded);
+ goto fail;
+ }
chars_decoded += PyUnicode_GET_SIZE(decoded);
Py_DECREF(decoded);
cookie.need_eof = 1;
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com