https://github.com/python/cpython/commit/c359fcd2f50d02e4709e9ca3175c1ba1ea6dc7ef
commit: c359fcd2f50d02e4709e9ca3175c1ba1ea6dc7ef
branch: main
author: Hizuru <[email protected]>
committer: vstinner <[email protected]>
date: 2025-02-21T14:51:13+01:00
summary:

gh-129569: The function unicodedata.normalize() always returns built-in str 
(#129570)

Co-authored-by: Victor Stinner <[email protected]>

files:
A Misc/NEWS.d/next/Library/2025-02-02-16-30-27.gh-issue-129569.i0kPOG.rst
M Lib/test/test_unicodedata.py
M Modules/unicodedata.c

diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index 0285f0d51f2365..8e3fef6b6fe4a0 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -467,6 +467,29 @@ def test_bug_834676(self):
         # Check for bug 834676
         unicodedata.normalize('NFC', '\ud55c\uae00')
 
+    def test_normalize_return_type(self):
+        # gh-129569: normalize() return type must always be str
+        normalize = unicodedata.normalize
+
+        class MyStr(str):
+            pass
+
+        normalization_forms = ("NFC", "NFKC", "NFD", "NFKD")
+        input_strings = (
+            # normalized strings
+            "",
+            "ascii",
+            # unnormalized strings
+            "\u1e0b\u0323",
+            "\u0071\u0307\u0323",
+        )
+
+        for form in normalization_forms:
+            for input_str in input_strings:
+                with self.subTest(form=form, input_str=input_str):
+                    self.assertIs(type(normalize(form, input_str)), str)
+                    self.assertIs(type(normalize(form, MyStr(input_str))), str)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git 
a/Misc/NEWS.d/next/Library/2025-02-02-16-30-27.gh-issue-129569.i0kPOG.rst 
b/Misc/NEWS.d/next/Library/2025-02-02-16-30-27.gh-issue-129569.i0kPOG.rst
new file mode 100644
index 00000000000000..c4b8965106aa56
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-02-02-16-30-27.gh-issue-129569.i0kPOG.rst
@@ -0,0 +1 @@
+Fix :func:`unicodedata.normalize` to always return a built-in :class:`str` 
object when given an input of a :class:`str` subclass, regardless of whether 
the string is already normalized.
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index 60bde755d24574..79be7674fc8ab5 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -933,34 +933,34 @@ unicodedata_UCD_normalize_impl(PyObject *self, PyObject 
*form,
     if (PyUnicode_GET_LENGTH(input) == 0) {
         /* Special case empty input strings, since resizing
            them  later would cause internal errors. */
-        return Py_NewRef(input);
+        return PyUnicode_FromObject(input);
     }
 
     if (PyUnicode_CompareWithASCIIString(form, "NFC") == 0) {
         if (is_normalized_quickcheck(self, input,
                                      true,  false, true) == YES) {
-            return Py_NewRef(input);
+            return PyUnicode_FromObject(input);
         }
         return nfc_nfkc(self, input, 0);
     }
     if (PyUnicode_CompareWithASCIIString(form, "NFKC") == 0) {
         if (is_normalized_quickcheck(self, input,
                                      true,  true,  true) == YES) {
-            return Py_NewRef(input);
+            return PyUnicode_FromObject(input);
         }
         return nfc_nfkc(self, input, 1);
     }
     if (PyUnicode_CompareWithASCIIString(form, "NFD") == 0) {
         if (is_normalized_quickcheck(self, input,
                                      false, false, true) == YES) {
-            return Py_NewRef(input);
+            return PyUnicode_FromObject(input);
         }
         return nfd_nfkd(self, input, 0);
     }
     if (PyUnicode_CompareWithASCIIString(form, "NFKD") == 0) {
         if (is_normalized_quickcheck(self, input,
                                      false, true,  true) == YES) {
-            return Py_NewRef(input);
+            return PyUnicode_FromObject(input);
         }
         return nfd_nfkd(self, input, 1);
     }

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to