Serhiy Storchaka added the comment:

Crash is possible not only when reading from text files, but also when decoder 
returns a non-string or when decoder's state is not a bytes object. This is 
possible with malicious decoder and perhaps with some old not bytes-to-string 
decoder in stdlib codecs registry.

Here are patches for different versions.

----------
keywords: +patch
Added file: http://bugs.python.org/file28938/textio_type_check-2.7.patch
Added file: http://bugs.python.org/file28939/textio_type_check-3.3.patch
Added file: http://bugs.python.org/file28940/textio_type_check-3.2.patch

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue17106>
_______________________________________
diff -r c37ac05119ff Modules/_io/textio.c
--- a/Modules/_io/textio.c      Sat Feb 02 15:06:45 2013 -0800
+++ b/Modules/_io/textio.c      Sun Feb 03 10:41:09 2013 +0200
@@ -269,8 +269,9 @@
         return NULL;
 
     if (!PyUnicode_Check(output)) {
-        PyErr_SetString(PyExc_TypeError,
-                        "decoder should return a string result");
+        PyErr_Format(PyExc_TypeError,
+                     "decoder should return a string result, not '%.200s'",
+                     Py_TYPE(output)->tp_name);
         goto error;
     }
 
@@ -1417,7 +1418,12 @@
     Py_DECREF(chunk_size);
     if (input_chunk == NULL)
         goto fail;
-    assert(PyBytes_Check(input_chunk));
+    if (!PyBytes_Check(input_chunk)) {
+        PyErr_Format(PyExc_IOError,
+                     "underlying read1() should have returned a bytes object, "
+                     "not '%.200s'", Py_TYPE(input_chunk)->tp_name);
+        goto fail;
+    }
 
     eof = (PyBytes_Size(input_chunk) == 0);
 
@@ -1444,7 +1450,14 @@
         PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
         if (next_input == NULL)
             goto fail;
-        assert (PyBytes_Check(next_input));
+        if (!PyBytes_Check(next_input)) {
+            PyErr_Format(PyExc_TypeError,
+                         "decoder getstate() should have returned a bytes "
+                         "object, not '%.200s'",
+                         Py_TYPE(next_input)->tp_name);
+            Py_DECREF(next_input);
+            goto fail;
+        }
         Py_DECREF(dec_buffer);
         Py_CLEAR(self->snapshot);
         self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
@@ -2110,7 +2123,14 @@
         if (input_chunk == NULL)
             goto fail;
 
-        assert (PyBytes_Check(input_chunk));
+        if (!PyBytes_Check(input_chunk)) {
+            PyErr_Format(PyExc_IOError,
+                         "underlying read() should have returned a bytes "
+                         "object, not '%.200s'",
+                         Py_TYPE(input_chunk)->tp_name);
+            Py_DECREF(input_chunk);
+            goto fail;
+        }
 
         self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
         if (self->snapshot == NULL) {
@@ -2247,7 +2267,13 @@
             self->decoder, "decode", "s#", input, 1);
         if (decoded == NULL)
             goto fail;
-        assert (PyUnicode_Check(decoded));
+        if (!PyUnicode_Check(decoded)) {
+            PyErr_Format(PyExc_TypeError,
+                         "decoder should return a string result, not '%.200s'",
+                         Py_TYPE(decoded)->tp_name);
+            Py_DECREF(decoded);
+            goto fail;
+        }
         chars_decoded += PyUnicode_GET_SIZE(decoded);
         Py_DECREF(decoded);
 
@@ -2281,7 +2307,13 @@
             self->decoder, "decode", "si", "", /* final = */ 1);
         if (decoded == NULL)
             goto fail;
-        assert (PyUnicode_Check(decoded));
+        if (!PyUnicode_Check(decoded)) {
+            PyErr_Format(PyExc_TypeError,
+                         "decoder should return a string result, not '%.200s'",
+                         Py_TYPE(decoded)->tp_name);
+            Py_DECREF(decoded);
+            goto fail;
+        }
         chars_decoded += PyUnicode_GET_SIZE(decoded);
         Py_DECREF(decoded);
         cookie.need_eof = 1;
@@ -2440,7 +2472,7 @@
     Py_DECREF(res);
     if (r < 0)
         return NULL;
-    
+
     if (r > 0) {
         Py_RETURN_NONE; /* stream already closed */
     }
diff -r 3653d8174b0b Modules/_io/textio.c
--- a/Modules/_io/textio.c      Sat Feb 02 15:12:59 2013 -0800
+++ b/Modules/_io/textio.c      Sun Feb 03 10:42:13 2013 +0200
@@ -290,8 +290,9 @@
         return NULL;
 
     if (!PyUnicode_Check(output)) {
-        PyErr_SetString(PyExc_TypeError,
-                        "decoder should return a string result");
+        PyErr_Format(PyExc_TypeError,
+                     "decoder should return a string result, not '%.200s'",
+                     Py_TYPE(output)->tp_name);
         goto error;
     }
 
@@ -1458,7 +1459,13 @@
     Py_DECREF(chunk_size);
     if (input_chunk == NULL)
         goto fail;
-    assert(PyBytes_Check(input_chunk));
+    if (!PyBytes_Check(input_chunk)) {
+        PyErr_Format(PyExc_IOError,
+                     "underlying %s() should have returned a bytes object, "
+                     "not '%.200s'", (self->has_read1 ? "read1": "read"),
+                     Py_TYPE(input_chunk)->tp_name);
+        goto fail;
+    }
 
     nbytes = PyBytes_Size(input_chunk);
     eof = (nbytes == 0);
@@ -1493,7 +1500,14 @@
         PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
         if (next_input == NULL)
             goto fail;
-        assert (PyBytes_Check(next_input));
+        if (!PyBytes_Check(next_input)) {
+            PyErr_Format(PyExc_TypeError,
+                         "decoder getstate() should have returned a bytes "
+                         "object, not '%.200s'",
+                         Py_TYPE(next_input)->tp_name);
+            Py_DECREF(next_input);
+            goto fail;
+        }
         Py_DECREF(dec_buffer);
         Py_CLEAR(self->snapshot);
         self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
@@ -2151,7 +2165,14 @@
         if (input_chunk == NULL)
             goto fail;
 
-        assert (PyBytes_Check(input_chunk));
+        if (!PyBytes_Check(input_chunk)) {
+            PyErr_Format(PyExc_IOError,
+                         "underlying read() should have returned a bytes "
+                         "object, not '%.200s'",
+                         Py_TYPE(input_chunk)->tp_name);
+            Py_DECREF(input_chunk);
+            goto fail;
+        }
 
         self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
         if (self->snapshot == NULL) {
@@ -2283,13 +2304,18 @@
         Py_DECREF(_state); \
     } while (0)
 
-    /* TODO: replace assert with exception */
 #define DECODER_DECODE(start, len, res) do { \
         PyObject *_decoded = _PyObject_CallMethodId( \
             self->decoder, &PyId_decode, "y#", start, len); \
         if (_decoded == NULL) \
             goto fail; \
-        assert (PyUnicode_Check(_decoded)); \
+        if (!PyUnicode_Check(_decoded)) { \
+            PyErr_Format(PyExc_TypeError, \
+                         "decoder should return a string result, not 
'%.200s'", \
+                         Py_TYPE(_decoded)->tp_name); \
+            Py_DECREF(_decoded); \
+            goto fail; \
+        } \
         res = PyUnicode_GET_LENGTH(_decoded); \
         Py_DECREF(_decoded); \
     } while (0)
@@ -2372,7 +2398,13 @@
             self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
         if (decoded == NULL)
             goto fail;
-        assert (PyUnicode_Check(decoded));
+        if (!PyUnicode_Check(decoded)) {
+            PyErr_Format(PyExc_TypeError,
+                         "decoder should return a string result, not '%.200s'",
+                         Py_TYPE(decoded)->tp_name);
+            Py_DECREF(decoded);
+            goto fail;
+        }
         chars_decoded += PyUnicode_GET_LENGTH(decoded);
         Py_DECREF(decoded);
         cookie.need_eof = 1;
diff -r 6c9f4c22fd81 Modules/_io/textio.c
--- a/Modules/_io/textio.c      Sat Feb 02 15:08:52 2013 -0800
+++ b/Modules/_io/textio.c      Sun Feb 03 10:37:22 2013 +0200
@@ -269,8 +269,9 @@
         return NULL;
 
     if (!PyUnicode_Check(output)) {
-        PyErr_SetString(PyExc_TypeError,
-                        "decoder should return a string result");
+        PyErr_Format(PyExc_TypeError,
+                     "decoder should return a string result, not '%.200s'",
+                     Py_TYPE(output)->tp_name);
         goto error;
     }
 
@@ -1454,7 +1455,13 @@
     Py_DECREF(chunk_size);
     if (input_chunk == NULL)
         goto fail;
-    assert(PyBytes_Check(input_chunk));
+    if (!PyBytes_Check(input_chunk)) {
+        PyErr_Format(PyExc_IOError,
+                     "underlying %s() should have returned a bytes object, "
+                     "not '%.200s'", (self->has_read1 ? "read1": "read"),
+                     Py_TYPE(input_chunk)->tp_name);
+        goto fail;
+    }
 
     eof = (PyBytes_Size(input_chunk) == 0);
 
@@ -1481,7 +1488,14 @@
         PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
         if (next_input == NULL)
             goto fail;
-        assert (PyBytes_Check(next_input));
+        if (!PyBytes_Check(next_input)) {
+            PyErr_Format(PyExc_TypeError,
+                         "decoder getstate() should have returned a bytes "
+                         "object, not '%.200s'",
+                         Py_TYPE(next_input)->tp_name);
+            Py_DECREF(next_input);
+            goto fail;
+        }
         Py_DECREF(dec_buffer);
         Py_CLEAR(self->snapshot);
         self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
@@ -2123,7 +2137,14 @@
         if (input_chunk == NULL)
             goto fail;
 
-        assert (PyBytes_Check(input_chunk));
+        if (!PyBytes_Check(input_chunk)) {
+            PyErr_Format(PyExc_IOError,
+                         "underlying read() should have returned a bytes "
+                         "object, not '%.200s'",
+                         Py_TYPE(input_chunk)->tp_name);
+            Py_DECREF(input_chunk);
+            goto fail;
+        }
 
         self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
         if (self->snapshot == NULL) {
@@ -2259,7 +2280,13 @@
             self->decoder, "decode", "y#", input, 1);
         if (decoded == NULL)
             goto fail;
-        assert (PyUnicode_Check(decoded));
+        if (!PyUnicode_Check(decoded)) {
+            PyErr_Format(PyExc_TypeError,
+                         "decoder should return a string result, not '%.200s'",
+                         Py_TYPE(decoded)->tp_name);
+            Py_DECREF(decoded);
+            goto fail;
+        }
         chars_decoded += PyUnicode_GET_SIZE(decoded);
         Py_DECREF(decoded);
 
@@ -2293,7 +2320,13 @@
             self->decoder, "decode", "yi", "", /* final = */ 1);
         if (decoded == NULL)
             goto fail;
-        assert (PyUnicode_Check(decoded));
+        if (!PyUnicode_Check(decoded)) {
+            PyErr_Format(PyExc_TypeError,
+                         "decoder should return a string result, not '%.200s'",
+                         Py_TYPE(decoded)->tp_name);
+            Py_DECREF(decoded);
+            goto fail;
+        }
         chars_decoded += PyUnicode_GET_SIZE(decoded);
         Py_DECREF(decoded);
         cookie.need_eof = 1;
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to