New submission from Christian Heimes:

This is a *preliminary* patch for __file__ and co_filename which decodes
char* / PyString using the FS default encoding. I'm still working on it.

----------
components: Interpreter Core
files: py3k_file_fsenc.patch
messages: 56345
nosy: tiran
severity: normal
status: open
title: __file__ and co_filename as unicode
versions: Python 3.0

__________________________________
Tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue1264>
__________________________________
Index: Python/pythonrun.c
===================================================================
--- Python/pythonrun.c	(revision 58412)
+++ Python/pythonrun.c	(working copy)
@@ -867,7 +867,8 @@
 		return -1;
 	d = PyModule_GetDict(m);
 	if (PyDict_GetItemString(d, "__file__") == NULL) {
-		PyObject *f = PyString_FromString(filename);
+		PyObject *f;
+        f = _PyUnicode_DecodeFSDefault(filename, strlen(filename), NULL);
 		if (f == NULL)
 			return -1;
 		if (PyDict_SetItemString(d, "__file__", f) < 0) {
Index: Python/import.c
===================================================================
--- Python/import.c	(revision 58412)
+++ Python/import.c	(working copy)
@@ -652,9 +652,11 @@
 	/* Remember the filename as the __file__ attribute */
 	v = NULL;
 	if (pathname != NULL) {
-		v = PyString_FromString(pathname);
-		if (v == NULL)
+		v = _PyUnicode_DecodeFSDefault(pathname, strlen(pathname), NULL);
+		if (v == NULL) {
 			PyErr_Clear();
+            printf("__file__ error");
+            }
 	}
 	if (v == NULL) {
 		v = ((PyCodeObject *)co)->co_filename;
@@ -983,7 +985,7 @@
 		PySys_WriteStderr("import %s # directory %s\n",
 			name, pathname);
 	d = PyModule_GetDict(m);
-	file = PyString_FromString(pathname);
+	file = _PyUnicode_DecodeFSDefault(pathname, strlen(pathname), NULL);
 	if (file == NULL)
 		goto error;
 	path = Py_BuildValue("[O]", file);
Index: Python/importdl.c
===================================================================
--- Python/importdl.c	(revision 58412)
+++ Python/importdl.c	(working copy)
@@ -62,7 +62,9 @@
 		return NULL;
 	}
 	/* Remember the filename as the __file__ attribute */
-	if (PyModule_AddStringConstant(m, "__file__", pathname) < 0)
+    PyObject *path;
+    path = _PyUnicode_DecodeFSDefault(pathname, strlen(pathname), NULL);
+	if (PyModule_AddObject(m, "__file__", path) < 0)
 		PyErr_Clear(); /* Not important enough to report */
 
 	if (_PyImport_FixupExtension(name, pathname) == NULL)
Index: Include/unicodeobject.h
===================================================================
--- Include/unicodeobject.h	(revision 58412)
+++ Include/unicodeobject.h	(working copy)
@@ -154,6 +154,7 @@
 # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
 # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
 # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
+# define _PyUnicode_DecodeFSDefault _PyUnicodeUCS2_DecodeFSDefault
 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
 # define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
@@ -245,6 +246,7 @@
 # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
 # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
 # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
+# define _PyUnicode_DecodeFSDefault _PyUnicodeUCS4_DecodeFSDefault
 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
 # define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
@@ -641,6 +643,20 @@
 PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
     PyObject *, const char *);
 
+/* Decode a string to a Python unicode object using either
+   Py_FileSystemDefaultEncoding or UTF-8 if the default encoding isn't given.
+
+   *** Exported for internal use by the interpreter only !!! ***
+
+*/
+
+PyAPI_FUNC(PyObject*) _PyUnicode_DecodeFSDefault(
+    const char *string, 	/* encoded string */
+    Py_ssize_t length,	 	/* size of string */
+    const char *errors		/* error handling */
+    );
+
+
 /* Return a char* holding the UTF-8 encoded value of the
    Unicode object.
 
Index: Objects/codeobject.c
===================================================================
--- Objects/codeobject.c	(revision 58412)
+++ Objects/codeobject.c	(working copy)
@@ -59,7 +59,7 @@
 	    freevars == NULL || !PyTuple_Check(freevars) ||
 	    cellvars == NULL || !PyTuple_Check(cellvars) ||
 	    name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) ||
-	    filename == NULL || !PyString_Check(filename) ||
+	    filename == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) ||
 	    lnotab == NULL || !PyString_Check(lnotab) ||
 	    !PyObject_CheckReadBuffer(code)) {
 		PyErr_BadInternalCall();
@@ -72,6 +72,20 @@
 	} else {
 		Py_INCREF(name);
 	}
+	if (PyString_Check(filename)) {
+        /* We don't have enough infrastructure here to use the codecs package.
+           What happens if the the fs default encoding is not UTF-8?
+
+        filename = PyString_AsDecodedObject(filename,
+                       Py_FileSystemDefaultEncoding ? Py_FileSystemDefaultEncoding : "UTF-8",
+                       NULL);
+        */
+        filename = PyUnicode_FromString(PyString_AS_STRING(filename));
+		if (filename == NULL)
+			return NULL;
+	} else {
+		Py_INCREF(filename);
+	}
 	intern_strings(names);
 	intern_strings(varnames);
 	intern_strings(freevars);
Index: Objects/unicodeobject.c
===================================================================
--- Objects/unicodeobject.c	(revision 58412)
+++ Objects/unicodeobject.c	(working copy)
@@ -1231,6 +1231,22 @@
     return v;
 }
 
+PyObject*
+_PyUnicode_DecodeFSDefault(const char *string,
+                           Py_ssize_t length,
+                           const char *errors)
+{
+    PyObject *v;
+
+    if (Py_FileSystemDefaultEncoding)
+        v = PyUnicode_Decode(string, length, Py_FileSystemDefaultEncoding,
+                             errors);
+    else
+        v = PyUnicode_DecodeUTF8(string, length, errors);
+
+    return (PyObject*)v;
+}
+
 char*
 PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
 {
Index: Objects/moduleobject.c
===================================================================
--- Objects/moduleobject.c	(revision 58412)
+++ Objects/moduleobject.c	(working copy)
@@ -86,12 +86,12 @@
 	d = ((PyModuleObject *)m)->md_dict;
 	if (d == NULL ||
 	    (fileobj = PyDict_GetItemString(d, "__file__")) == NULL ||
-	    !PyString_Check(fileobj))
+	    !PyUnicode_Check(fileobj))
 	{
 		PyErr_SetString(PyExc_SystemError, "module filename missing");
 		return NULL;
 	}
-	return PyString_AsString(fileobj);
+	return PyUnicode_AsString(fileobj);
 }
 
 void
_______________________________________________
Python-bugs-list mailing list 
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to