Alexandre Vassalotti added the comment:

I don't think it's possible to add shortcuts in PyUnicode_Decode for
UTF-16 and UTF-32 because the byte-order can be different depending of
the platform. So, these two need to pass through the codecs module.

I am sure if it's better, but I factored out the normalization routine
into its own function.

Added file: http://bugs.python.org/file8589/py3k_profile_fix-3.patch

__________________________________
Tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue1302>
__________________________________
Index: Objects/unicodeobject.c
===================================================================
--- Objects/unicodeobject.c	(revision 58587)
+++ Objects/unicodeobject.c	(working copy)
@@ -1049,29 +1049,55 @@
     return NULL;
 }
 
+static char *
+normalize(const char *enc)
+{
+    register size_t i;
+    size_t len = strlen(enc);
+    char *p;
+
+    p = PyMem_Malloc(len + 1);
+    if (p == NULL)
+        return NULL;
+    for (i = 0; i < len; i++) {
+        register char ch = enc[i];
+        if (ch == ' ')
+            ch = '-';
+        else
+            ch = tolower(Py_CHARMASK(ch));
+    }
+    p[i] = '\0';
+    return p;
+}
+
 PyObject *PyUnicode_Decode(const char *s,
-			   Py_ssize_t size,
-			   const char *encoding,
-			   const char *errors)
+                           Py_ssize_t size,
+                           const char *encoding,
+                           const char *errors)
 {
     PyObject *buffer = NULL, *unicode;
     Py_buffer info;
+    char *enc;
 
     if (encoding == NULL)
-	encoding = PyUnicode_GetDefaultEncoding();
+        encoding = PyUnicode_GetDefaultEncoding();
 
+    enc = normalize(encoding);
+
     /* Shortcuts for common default encodings */
-    if (strcmp(encoding, "utf-8") == 0)
+    if (strcmp(enc, "utf-8") == 0)
         return PyUnicode_DecodeUTF8(s, size, errors);
-    else if (strcmp(encoding, "latin-1") == 0)
+    else if (strcmp(enc, "latin-1") == 0)
         return PyUnicode_DecodeLatin1(s, size, errors);
 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
-    else if (strcmp(encoding, "mbcs") == 0)
+    else if (strcmp(enc, "mbcs") == 0)
         return PyUnicode_DecodeMBCS(s, size, errors);
 #endif
-    else if (strcmp(encoding, "ascii") == 0)
+    else if (strcmp(enc, "ascii") == 0)
         return PyUnicode_DecodeASCII(s, size, errors);
 
+    PyMem_Free(enc);
+
     /* Decode via the codec registry */
     buffer = NULL;
     if (PyBuffer_FillInfo(&info, (void *)s, size, 1, PyBUF_SIMPLE) < 0)
Index: Lib/test/regrtest.py
===================================================================
--- Lib/test/regrtest.py	(revision 58587)
+++ Lib/test/regrtest.py	(working copy)
@@ -1119,6 +1119,15 @@
             if not os.path.supports_unicode_filenames:
                 self.expected.add('test_pep277')
 
+            # doctest, profile and cProfile tests fail when the encoding
+            # of the filesystem is not built-in, because of the extra calls
+            # to the codecs module.
+            builtin_enc = ("utf-8", "latin-1", "ascii", "mbcs")
+            if sys.getfilesystemencoding().lower() not in builtin_enc:
+                self.expected.add('test_profile')
+                self.expected.add('test_cProfile')
+                self.expected.add('test_doctest')
+
             try:
                 from test import test_socket_ssl
             except ImportError:
_______________________________________________
Python-bugs-list mailing list 
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to