New submission from Christian Heimes:

I'm sending the patch in for review.

----------
components: Interpreter Core
files: py3k_file_fsenc2.patch
messages: 56374
nosy: tiran
severity: normal
status: open
title: Decode __file__ and co_filename to unicode using fs default
versions: Python 3.0

__________________________________
Tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue1272>
__________________________________
Index: Python/pythonrun.c
===================================================================
--- Python/pythonrun.c	(revision 58412)
+++ Python/pythonrun.c	(working copy)
@@ -867,7 +867,8 @@
 		return -1;
 	d = PyModule_GetDict(m);
 	if (PyDict_GetItemString(d, "__file__") == NULL) {
-		PyObject *f = PyString_FromString(filename);
+		PyObject *f;
+		f = PyUnicode_DecodeFSDefault(filename, 0, NULL);
 		if (f == NULL)
 			return -1;
 		if (PyDict_SetItemString(d, "__file__", f) < 0) {
Index: Python/import.c
===================================================================
--- Python/import.c	(revision 58412)
+++ Python/import.c	(working copy)
@@ -652,7 +652,7 @@
 	/* Remember the filename as the __file__ attribute */
 	v = NULL;
 	if (pathname != NULL) {
-		v = PyString_FromString(pathname);
+		v = PyUnicode_DecodeFSDefault(pathname, 0, NULL);
 		if (v == NULL)
 			PyErr_Clear();
 	}
@@ -983,7 +983,7 @@
 		PySys_WriteStderr("import %s # directory %s\n",
 			name, pathname);
 	d = PyModule_GetDict(m);
-	file = PyString_FromString(pathname);
+	file = PyUnicode_DecodeFSDefault(pathname, 0, NULL);
 	if (file == NULL)
 		goto error;
 	path = Py_BuildValue("[O]", file);
Index: Python/compile.c
===================================================================
--- Python/compile.c	(revision 58412)
+++ Python/compile.c	(working copy)
@@ -4001,7 +4001,7 @@
 	freevars = dict_keys_inorder(c->u->u_freevars, PyTuple_Size(cellvars));
 	if (!freevars)
 	    goto error;
-	filename = PyString_FromString(c->c_filename);
+	filename = PyUnicode_DecodeFSDefault(c->c_filename, 0, NULL);
 	if (!filename)
 		goto error;
 
Index: Python/importdl.c
===================================================================
--- Python/importdl.c	(revision 58412)
+++ Python/importdl.c	(working copy)
@@ -62,7 +62,9 @@
 		return NULL;
 	}
 	/* Remember the filename as the __file__ attribute */
-	if (PyModule_AddStringConstant(m, "__file__", pathname) < 0)
+	PyObject *path;
+	path = PyUnicode_DecodeFSDefault(pathname, 0, NULL);
+	if (PyModule_AddObject(m, "__file__", path) < 0)
 		PyErr_Clear(); /* Not important enough to report */
 
 	if (_PyImport_FixupExtension(name, pathname) == NULL)
Index: Include/unicodeobject.h
===================================================================
--- Include/unicodeobject.h	(revision 58412)
+++ Include/unicodeobject.h	(working copy)
@@ -154,6 +154,7 @@
 # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
 # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
 # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
+# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault
 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
 # define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
@@ -245,6 +246,7 @@
 # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
 # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
 # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
+# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault
 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
 # define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
@@ -641,6 +643,25 @@
 PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
     PyObject *, const char *);
 
+/* Decode a string to a Python unicode object using either
+   Py_FileSystemDefaultEncoding or UTF-8 if the default encoding isn't given.
+
+   The function is intended to be used for paths and file names only. It
+   doesn't use the codecs module and PyUnicode_Decode() since it is required
+   during boot strapping and before the codecs are set up. For that reason
+   the default fs encoding should be UTF-8, UTF-16, UTF-32, Latin-1 or MBCS.
+
+   In case the length paramenter is 0 the length of string is autodetected
+   with strlen(string). errors must be set to NULL.
+*/
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
+    const char *string, 	/* encoded string */
+    Py_ssize_t length,	 	/* size of string */
+    const char *errors		/* error handling */
+    );
+
+
 /* Return a char* holding the UTF-8 encoded value of the
    Unicode object.
 
Index: setup.py
===================================================================
--- setup.py	(revision 58412)
+++ setup.py	(working copy)
@@ -414,7 +414,6 @@
         # Python C API test module
         exts.append( Extension('_testcapi', ['_testcapimodule.c']) )
         # profilers (_lsprof is for cProfile.py)
-        exts.append( Extension('_hotshot', ['_hotshot.c']) )
         exts.append( Extension('_lsprof', ['_lsprof.c', 'rotatingtree.c']) )
         # static Unicode character database
         exts.append( Extension('unicodedata', ['unicodedata.c']) )
Index: Objects/codeobject.c
===================================================================
--- Objects/codeobject.c	(revision 58412)
+++ Objects/codeobject.c	(working copy)
@@ -59,7 +59,7 @@
 	    freevars == NULL || !PyTuple_Check(freevars) ||
 	    cellvars == NULL || !PyTuple_Check(cellvars) ||
 	    name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) ||
-	    filename == NULL || !PyString_Check(filename) ||
+	    filename == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) ||
 	    lnotab == NULL || !PyString_Check(lnotab) ||
 	    !PyObject_CheckReadBuffer(code)) {
 		PyErr_BadInternalCall();
@@ -72,6 +72,13 @@
 	} else {
 		Py_INCREF(name);
 	}
+	if (PyString_Check(filename)) {
+        filename = PyUnicode_DecodeFSDefault(PyString_AS_STRING(filename), 0, NULL);
+		if (filename == NULL)
+			return NULL;
+	} else {
+		Py_INCREF(filename);
+	}
 	intern_strings(names);
 	intern_strings(varnames);
 	intern_strings(freevars);
@@ -260,6 +267,8 @@
 		ourcellvars = PyTuple_New(0);
 	if (ourcellvars == NULL)
 		goto cleanup;
+    filename = PyUnicode_DecodeFSDefault(PyString_AS_STRING(filename),
+                                         0, NULL);
 
 	co = (PyObject *)PyCode_New(argcount, kwonlyargcount,
 				    nlocals, stacksize, flags,
Index: Objects/unicodeobject.c
===================================================================
--- Objects/unicodeobject.c	(revision 58412)
+++ Objects/unicodeobject.c	(working copy)
@@ -1231,6 +1231,61 @@
     return v;
 }
 
+PyObject*
+PyUnicode_DecodeFSDefault(const char *string, Py_ssize_t length,
+                          const char *errors)
+{
+    PyObject *v = NULL;
+    char encoding[32], mangled[32], *encptr, *manptr;
+    char tmp;
+
+    if (errors != NULL)
+        Py_FatalError("non-NULL encoding in PyUnicode_DecodeFSDefault");
+    if ((length == 0) && *string)
+        length = (Py_ssize_t)strlen(string);
+
+    strncpy(encoding,
+           Py_FileSystemDefaultEncoding ?
+           Py_FileSystemDefaultEncoding : "UTF-8",
+           31);
+    encoding[31] = '\0';
+
+    encptr = encoding;
+    manptr = mangled;
+    /* lower the string and remove non alpha numeric chars like '-' */
+    while(*encptr) {
+       tmp = *encptr++;
+       if (isupper(tmp))
+           tmp = tolower(tmp);
+       if (!isalnum(tmp))
+           continue;
+       *manptr++ = tmp;
+    }
+    *manptr++ = '\0';
+
+    if (strcmp(mangled, "utf8") == 0)
+        v = PyUnicode_DecodeUTF8(string, length, NULL);
+    else if (strcmp(mangled, "utf16") == 0)
+        v = PyUnicode_DecodeUTF16(string, length, NULL, 0);
+    else if (strcmp(mangled, "utf32") == 0)
+        v = PyUnicode_DecodeUTF32(string, length, NULL, 0);
+    else if ((strcmp(mangled, "latin1") == 0) ||
+             (strcmp(mangled, "iso88591") == 0) ||
+             (strcmp(mangled, "iso885915") == 0))
+        v = PyUnicode_DecodeLatin1(string, length, NULL);
+    else if (strcmp(mangled, "ascii") == 0)
+        v = PyUnicode_DecodeASCII(string, length, NULL);
+#ifdef MS_WIN32
+    else if (strcmp(mangled, "mbcs") == 0)
+        v = PyUnicode_DecodeMBCS(string, length, NULL);
+#endif
+
+    if (v == NULL)
+        v = PyUnicode_DecodeUTF8(string, length, "replace");
+
+    return (PyObject*)v;
+}
+
 char*
 PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
 {
Index: Objects/moduleobject.c
===================================================================
--- Objects/moduleobject.c	(revision 58412)
+++ Objects/moduleobject.c	(working copy)
@@ -86,12 +86,12 @@
 	d = ((PyModuleObject *)m)->md_dict;
 	if (d == NULL ||
 	    (fileobj = PyDict_GetItemString(d, "__file__")) == NULL ||
-	    !PyString_Check(fileobj))
+	    !PyUnicode_Check(fileobj))
 	{
 		PyErr_SetString(PyExc_SystemError, "module filename missing");
 		return NULL;
 	}
-	return PyString_AsString(fileobj);
+	return PyUnicode_AsString(fileobj);
 }
 
 void
Index: Modules/pyexpat.c
===================================================================
--- Modules/pyexpat.c	(revision 58412)
+++ Modules/pyexpat.c	(working copy)
@@ -238,7 +238,7 @@
         nulltuple = PyTuple_New(0);
         if (nulltuple == NULL)
             goto failed;
-        filename = PyString_FromString(__FILE__);
+        filename = PyUnicode_DecodeFSDefault(__FILE__, 0, NULL);
         handler_info[slot].tb_code =
             PyCode_New(0,		/* argcount */
                        0,       /* kwonlyargcount */
_______________________________________________
Python-bugs-list mailing list 
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to