Serhiy Storchaka added the comment:

> Who's trying (and failing) to encode the filename?

Windows. File created using Unicode API and stored UTF-16 encoded in
NTFS. Windows fails to represent this filename using ANSI API.

Here is a patch against 2.7 which always uses Unicode API in listdir()
and tries to encode filenames to str if str argument used.

----------
keywords: +patch
Added file: http://bugs.python.org/file28319/listdir_unicode-2.7.patch

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue16656>
_______________________________________
diff -r 45cd2d816f4d Modules/posixmodule.c
--- a/Modules/posixmodule.c     Sun Jul 08 02:22:44 2012 -0700
+++ b/Modules/posixmodule.c     Fri Dec 14 22:48:53 2012 +0200
@@ -2099,120 +2099,74 @@
     PyObject *d, *v;
     HANDLE hFindFile;
     BOOL result;
-    WIN32_FIND_DATA FileData;
     char namebuf[MAX_PATH+5]; /* Overallocate for \\*.*\0 */
     char *bufptr = namebuf;
     Py_ssize_t len = sizeof(namebuf)-5; /* only claim to have space for 
MAX_PATH */
-
     PyObject *po;
-    if (PyArg_ParseTuple(args, "U:listdir", &po)) {
-        WIN32_FIND_DATAW wFileData;
-        Py_UNICODE *wnamebuf;
-        /* Overallocate for \\*.*\0 */
-        len = PyUnicode_GET_SIZE(po);
-        wnamebuf = malloc((len + 5) * sizeof(wchar_t));
-        if (!wnamebuf) {
-            PyErr_NoMemory();
+    PyObject *decoded = NULL;
+    WIN32_FIND_DATAW wFileData;
+    Py_UNICODE *wnamebuf;
+
+    if (!PyArg_ParseTuple(args, "U:listdir", &po)) {
+        if (!PyArg_ParseTuple(args, "et#:listdir",
+                              Py_FileSystemDefaultEncoding, &bufptr, &len))
             return NULL;
-        }
-        wcscpy(wnamebuf, PyUnicode_AS_UNICODE(po));
-        if (len > 0) {
-            Py_UNICODE wch = wnamebuf[len-1];
-            if (wch != L'/' && wch != L'\\' && wch != L':')
-                wnamebuf[len++] = L'\\';
-            wcscpy(wnamebuf + len, L"*.*");
-        }
-        if ((d = PyList_New(0)) == NULL) {
-            free(wnamebuf);
+        po = decoded = PyUnicode_Decode(bufptr, len,
+                                        Py_FileSystemDefaultEncoding, NULL);
+        if (po == NULL)
             return NULL;
-        }
-        Py_BEGIN_ALLOW_THREADS
-        hFindFile = FindFirstFileW(wnamebuf, &wFileData);
-        Py_END_ALLOW_THREADS
-        if (hFindFile == INVALID_HANDLE_VALUE) {
-            int error = GetLastError();
-            if (error == ERROR_FILE_NOT_FOUND) {
-                free(wnamebuf);
-                return d;
-            }
-            Py_DECREF(d);
-            win32_error_unicode("FindFirstFileW", wnamebuf);
-            free(wnamebuf);
-            return NULL;
-        }
-        do {
-            /* Skip over . and .. */
-            if (wcscmp(wFileData.cFileName, L".") != 0 &&
-                wcscmp(wFileData.cFileName, L"..") != 0) {
-                v = PyUnicode_FromUnicode(wFileData.cFileName, 
wcslen(wFileData.cFileName));
-                if (v == NULL) {
-                    Py_DECREF(d);
-                    d = NULL;
-                    break;
-                }
-                if (PyList_Append(d, v) != 0) {
-                    Py_DECREF(v);
-                    Py_DECREF(d);
-                    d = NULL;
-                    break;
-                }
-                Py_DECREF(v);
-            }
-            Py_BEGIN_ALLOW_THREADS
-            result = FindNextFileW(hFindFile, &wFileData);
-            Py_END_ALLOW_THREADS
-            /* FindNextFile sets error to ERROR_NO_MORE_FILES if
-               it got to the end of the directory. */
-            if (!result && GetLastError() != ERROR_NO_MORE_FILES) {
-                Py_DECREF(d);
-                win32_error_unicode("FindNextFileW", wnamebuf);
-                FindClose(hFindFile);
-                free(wnamebuf);
-                return NULL;
-            }
-        } while (result == TRUE);
-
-        if (FindClose(hFindFile) == FALSE) {
-            Py_DECREF(d);
-            win32_error_unicode("FindClose", wnamebuf);
-            free(wnamebuf);
-            return NULL;
-        }
+    }
+
+    /* Overallocate for \\*.*\0 */
+    len = PyUnicode_GET_SIZE(po);
+    wnamebuf = malloc((len + 5) * sizeof(wchar_t));
+    if (!wnamebuf) {
+        Py_XDECREF(decoded);
+        PyErr_NoMemory();
+        return NULL;
+    }
+    wcscpy(wnamebuf, PyUnicode_AS_UNICODE(po));
+    if (len > 0) {
+        Py_UNICODE wch = wnamebuf[len-1];
+        if (wch != L'/' && wch != L'\\' && wch != L':')
+            wnamebuf[len++] = L'\\';
+        wcscpy(wnamebuf + len, L"*.*");
+    }
+    if ((d = PyList_New(0)) == NULL) {
         free(wnamebuf);
-        return d;
-    }
-    /* Drop the argument parsing error as narrow strings
-       are also valid. */
-    PyErr_Clear();
-
-    if (!PyArg_ParseTuple(args, "et#:listdir",
-                          Py_FileSystemDefaultEncoding, &bufptr, &len))
-        return NULL;
-    if (len > 0) {
-        char ch = namebuf[len-1];
-        if (ch != SEP && ch != ALTSEP && ch != ':')
-            namebuf[len++] = '/';
-        strcpy(namebuf + len, "*.*");
-    }
-
-    if ((d = PyList_New(0)) == NULL)
-        return NULL;
-
+        Py_XDECREF(decoded);
+        return NULL;
+    }
     Py_BEGIN_ALLOW_THREADS
-    hFindFile = FindFirstFile(namebuf, &FileData);
+    hFindFile = FindFirstFileW(wnamebuf, &wFileData);
     Py_END_ALLOW_THREADS
     if (hFindFile == INVALID_HANDLE_VALUE) {
         int error = GetLastError();
-        if (error == ERROR_FILE_NOT_FOUND)
+        if (error == ERROR_FILE_NOT_FOUND) {
+            free(wnamebuf);
+            Py_XDECREF(decoded);
             return d;
+        }
         Py_DECREF(d);
-        return win32_error("FindFirstFile", namebuf);
+        win32_error_unicode("FindFirstFileW", wnamebuf);
+        free(wnamebuf);
+        Py_XDECREF(decoded);
+        return NULL;
     }
     do {
         /* Skip over . and .. */
-        if (strcmp(FileData.cFileName, ".") != 0 &&
-            strcmp(FileData.cFileName, "..") != 0) {
-            v = PyString_FromString(FileData.cFileName);
+        if (wcscmp(wFileData.cFileName, L".") != 0 &&
+            wcscmp(wFileData.cFileName, L"..") != 0) {
+            len = wcslen(wFileData.cFileName);
+            if (decoded) {
+                v = PyUnicode_Encode(wFileData.cFileName, len,
+                                     Py_FileSystemDefaultEncoding, NULL);
+                if (v == NULL)
+                    PyErr_Clear();
+                v = PyUnicode_FromUnicode(wFileData.cFileName, len);
+            }
+            else
+                v = PyUnicode_FromUnicode(wFileData.cFileName, len);
             if (v == NULL) {
                 Py_DECREF(d);
                 d = NULL;
@@ -2227,23 +2181,29 @@
             Py_DECREF(v);
         }
         Py_BEGIN_ALLOW_THREADS
-        result = FindNextFile(hFindFile, &FileData);
+        result = FindNextFileW(hFindFile, &wFileData);
         Py_END_ALLOW_THREADS
         /* FindNextFile sets error to ERROR_NO_MORE_FILES if
            it got to the end of the directory. */
         if (!result && GetLastError() != ERROR_NO_MORE_FILES) {
             Py_DECREF(d);
-            win32_error("FindNextFile", namebuf);
+            win32_error_unicode("FindNextFileW", wnamebuf);
             FindClose(hFindFile);
+            free(wnamebuf);
+            Py_XDECREF(decoded);
             return NULL;
         }
     } while (result == TRUE);
 
     if (FindClose(hFindFile) == FALSE) {
         Py_DECREF(d);
-        return win32_error("FindClose", namebuf);
-    }
-
+        win32_error_unicode("FindClose", wnamebuf);
+        free(wnamebuf);
+        Py_XDECREF(decoded);
+        return NULL;
+    }
+    free(wnamebuf);
+    Py_XDECREF(decoded);
     return d;
 
 #elif defined(PYOS_OS2)
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to