Alexandre Vassalotti added the comment:
I don't think it's possible to add shortcuts in PyUnicode_Decode for
UTF-16 and UTF-32 because the byte-order can be different depending of
the platform. So, these two need to pass through the codecs module.
I am sure if it's better, but I factored out the normalization routine
into its own function.
Added file: http://bugs.python.org/file8589/py3k_profile_fix-3.patch
__________________________________
Tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue1302>
__________________________________
Index: Objects/unicodeobject.c
===================================================================
--- Objects/unicodeobject.c (revision 58587)
+++ Objects/unicodeobject.c (working copy)
@@ -1049,29 +1049,55 @@
return NULL;
}
+static char *
+normalize(const char *enc)
+{
+ register size_t i;
+ size_t len = strlen(enc);
+ char *p;
+
+ p = PyMem_Malloc(len + 1);
+ if (p == NULL)
+ return NULL;
+ for (i = 0; i < len; i++) {
+ register char ch = enc[i];
+ if (ch == ' ')
+ ch = '-';
+ else
+ ch = tolower(Py_CHARMASK(ch));
+ }
+ p[i] = '\0';
+ return p;
+}
+
PyObject *PyUnicode_Decode(const char *s,
- Py_ssize_t size,
- const char *encoding,
- const char *errors)
+ Py_ssize_t size,
+ const char *encoding,
+ const char *errors)
{
PyObject *buffer = NULL, *unicode;
Py_buffer info;
+ char *enc;
if (encoding == NULL)
- encoding = PyUnicode_GetDefaultEncoding();
+ encoding = PyUnicode_GetDefaultEncoding();
+ enc = normalize(encoding);
+
/* Shortcuts for common default encodings */
- if (strcmp(encoding, "utf-8") == 0)
+ if (strcmp(enc, "utf-8") == 0)
return PyUnicode_DecodeUTF8(s, size, errors);
- else if (strcmp(encoding, "latin-1") == 0)
+ else if (strcmp(enc, "latin-1") == 0)
return PyUnicode_DecodeLatin1(s, size, errors);
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
- else if (strcmp(encoding, "mbcs") == 0)
+ else if (strcmp(enc, "mbcs") == 0)
return PyUnicode_DecodeMBCS(s, size, errors);
#endif
- else if (strcmp(encoding, "ascii") == 0)
+ else if (strcmp(enc, "ascii") == 0)
return PyUnicode_DecodeASCII(s, size, errors);
+ PyMem_Free(enc);
+
/* Decode via the codec registry */
buffer = NULL;
if (PyBuffer_FillInfo(&info, (void *)s, size, 1, PyBUF_SIMPLE) < 0)
Index: Lib/test/regrtest.py
===================================================================
--- Lib/test/regrtest.py (revision 58587)
+++ Lib/test/regrtest.py (working copy)
@@ -1119,6 +1119,15 @@
if not os.path.supports_unicode_filenames:
self.expected.add('test_pep277')
+ # doctest, profile and cProfile tests fail when the encoding
+ # of the filesystem is not built-in, because of the extra calls
+ # to the codecs module.
+ builtin_enc = ("utf-8", "latin-1", "ascii", "mbcs")
+ if sys.getfilesystemencoding().lower() not in builtin_enc:
+ self.expected.add('test_profile')
+ self.expected.add('test_cProfile')
+ self.expected.add('test_doctest')
+
try:
from test import test_socket_ssl
except ImportError:
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com