https://github.com/python/cpython/commit/0c8fecc4cfc41e82307a089c474dc69b009bdbcc
commit: 0c8fecc4cfc41e82307a089c474dc69b009bdbcc
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2025-08-18T10:11:15+03:00
summary:
gh-137729: Fix support for locales with @-modifiers (GH-137253)
files:
A Misc/NEWS.d/next/Library/2025-08-14-00-00-12.gh-issue-137729.i9NSKP.rst
M Doc/library/locale.rst
M Doc/whatsnew/3.15.rst
M Lib/locale.py
M Lib/test/test_locale.py
M Tools/i18n/makelocalealias.py
diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst
index d48ea04077f366..0800b3e5677c93 100644
--- a/Doc/library/locale.rst
+++ b/Doc/library/locale.rst
@@ -42,7 +42,7 @@ The :mod:`locale` module defines the following exception and
functions:
If *locale* is a pair, it is converted to a locale name using
the locale aliasing engine.
The language code has the same format as a :ref:`locale name <locale_name>`,
- but without encoding and ``@``-modifier.
+ but without encoding.
The language code and encoding can be ``None``.
If *locale* is omitted or ``None``, the current setting for *category* is
@@ -58,6 +58,9 @@ The :mod:`locale` module defines the following exception and
functions:
specified in the :envvar:`LANG` environment variable). If the locale is not
changed thereafter, using multithreading should not cause problems.
+ .. versionchanged:: next
+ Support language codes with ``@``-modifiers.
+
.. function:: localeconv()
@@ -366,11 +369,15 @@ The :mod:`locale` module defines the following exception
and functions:
values except :const:`LC_ALL`. It defaults to :const:`LC_CTYPE`.
The language code has the same format as a :ref:`locale name <locale_name>`,
- but without encoding and ``@``-modifier.
+ but without encoding.
The language code and encoding may be ``None`` if their values cannot be
determined.
The "C" locale is represented as ``(None, None)``.
+ .. versionchanged:: next
+ ``@``-modifier are no longer silently removed, but included in
+ the language code.
+
.. function:: getpreferredencoding(do_setlocale=True)
diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
index 252d8966b7450f..407606da961c16 100644
--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@@ -274,6 +274,15 @@ http.cookies
(Contributed by Nick Burns and Senthil Kumaran in :gh:`92936`.)
+locale
+------
+
+* :func:`~locale.setlocale` now supports language codes with ``@``-modifiers.
+ ``@``-modifier are no longer silently removed in :func:`~locale.getlocale`,
+ but included in the language code.
+ (Contributed by Serhiy Storchaka in :gh:`137729`.)
+
+
math
----
diff --git a/Lib/locale.py b/Lib/locale.py
index 0bde7ed51c66c1..37cafb4a601b3c 100644
--- a/Lib/locale.py
+++ b/Lib/locale.py
@@ -375,12 +375,14 @@ def _replace_encoding(code, encoding):
def _append_modifier(code, modifier):
if modifier == 'euro':
if '.' not in code:
- return code + '.ISO8859-15'
+ # Linux appears to require keeping the "@euro" modifier in place,
+ # even when using the ".ISO8859-15" encoding.
+ return code + '.ISO8859-15@euro'
_, _, encoding = code.partition('.')
- if encoding in ('ISO8859-15', 'UTF-8'):
+ if encoding == 'UTF-8':
return code
if encoding == 'ISO8859-1':
- return _replace_encoding(code, 'ISO8859-15')
+ code = _replace_encoding(code, 'ISO8859-15')
return code + '@' + modifier
def normalize(localename):
@@ -485,13 +487,18 @@ def _parse_localename(localename):
# Deal with locale modifiers
code, modifier = code.split('@', 1)
if modifier == 'euro' and '.' not in code:
- # Assume Latin-9 for @euro locales. This is bogus,
- # since some systems may use other encodings for these
- # locales. Also, we ignore other modifiers.
- return code, 'iso-8859-15'
+ # Assume ISO8859-15 for @euro locales. Do note that some systems
+ # may use other encodings for these locales, so this may not always
+ # be correct.
+ return code + '@euro', 'ISO8859-15'
+ else:
+ modifier = ''
if '.' in code:
- return tuple(code.split('.')[:2])
+ code, encoding = code.split('.')[:2]
+ if modifier:
+ code += '@' + modifier
+ return code, encoding
elif code == 'C':
return None, None
elif code == 'UTF-8':
@@ -516,7 +523,14 @@ def _build_localename(localetuple):
if encoding is None:
return language
else:
- return language + '.' + encoding
+ if '@' in language:
+ language, modifier = language.split('@', 1)
+ else:
+ modifier = ''
+ localename = language + '.' + encoding
+ if modifier:
+ localename += '@' + modifier
+ return localename
except (TypeError, ValueError):
raise TypeError('Locale must be None, a string, or an iterable of '
'two strings -- language code, encoding.') from None
@@ -888,6 +902,12 @@ def getpreferredencoding(do_setlocale=True):
# SS 2025-06-10:
# Remove 'c.utf8' -> 'en_US.UTF-8' because 'en_US.UTF-8' does not exist
# on all platforms.
+#
+# SS 2025-07-30:
+# Remove conflicts with GNU libc.
+#
+# removed 'el_gr@euro'
+# removed 'uz_uz@cyrillic'
locale_alias = {
'a3': 'az_AZ.KOI8-C',
@@ -1021,7 +1041,6 @@ def getpreferredencoding(do_setlocale=True):
'el': 'el_GR.ISO8859-7',
'el_cy': 'el_CY.ISO8859-7',
'el_gr': 'el_GR.ISO8859-7',
- 'el_gr@euro': 'el_GR.ISO8859-15',
'en': 'en_US.ISO8859-1',
'en_ag': 'en_AG.UTF-8',
'en_au': 'en_AU.ISO8859-1',
@@ -1456,7 +1475,6 @@ def getpreferredencoding(do_setlocale=True):
'ur_pk': 'ur_PK.CP1256',
'uz': 'uz_UZ.UTF-8',
'uz_uz': 'uz_UZ.UTF-8',
- 'uz_uz@cyrillic': 'uz_UZ.UTF-8',
've': 've_ZA.UTF-8',
've_za': 've_ZA.UTF-8',
'vi': 'vi_VN.TCVN',
diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py
index 698e137e3e8abd..01b1e754d04219 100644
--- a/Lib/test/test_locale.py
+++ b/Lib/test/test_locale.py
@@ -1,4 +1,5 @@
from decimal import Decimal
+from test import support
from test.support import cpython_only, verbose, is_android, linked_to_musl,
os_helper
from test.support.warnings_helper import check_warnings
from test.support.import_helper import ensure_lazy_imports, import_fresh_module
@@ -425,8 +426,8 @@ def test_hyphenated_encoding(self):
self.check('cs_CZ.ISO8859-2', 'cs_CZ.ISO8859-2')
def test_euro_modifier(self):
- self.check('de_DE@euro', 'de_DE.ISO8859-15')
- self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15')
+ self.check('de_DE@euro', 'de_DE.ISO8859-15@euro')
+ self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15@euro')
self.check('de_DE.utf8@euro', 'de_DE.UTF-8')
def test_latin_modifier(self):
@@ -534,6 +535,105 @@ def test_setlocale_long_encoding(self):
with self.assertRaises(locale.Error):
locale.setlocale(locale.LC_ALL, loc2)
+ @support.subTests('localename,localetuple', [
+ ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso885915')),
+ ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso88591')),
+ ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')),
+ ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-1')),
+ ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', None)),
+ ('de_DE.ISO8859-15@euro', ('de_DE@euro', 'iso885915')),
+ ('de_DE.ISO8859-15@euro', ('de_DE@euro', 'iso88591')),
+ ('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-15')),
+ ('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-1')),
+ ('de_DE.ISO8859-15@euro', ('de_DE@euro', None)),
+ ('el_GR.ISO8859-7@euro', ('el_GR@euro', 'iso88597')),
+ ('el_GR.ISO8859-7@euro', ('el_GR@euro', 'ISO8859-7')),
+ ('el_GR.ISO8859-7@euro', ('el_GR@euro', None)),
+ ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'iso885915')),
+ ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'iso88591')),
+ ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-15')),
+ ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-1')),
+ ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', None)),
+ ('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'utf8')),
+ ('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'UTF-8')),
+ ('ca_ES.UTF-8@valencia', ('ca_ES@valencia', None)),
+ ('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'utf8')),
+ ('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
+ ('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', None)),
+ ('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'utf8')),
+ ('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
+ ('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', None)),
+ ('be_BY.UTF-8@latin', ('be_BY@latin', 'utf8')),
+ ('be_BY.UTF-8@latin', ('be_BY@latin', 'UTF-8')),
+ ('be_BY.UTF-8@latin', ('be_BY@latin', None)),
+ ('sr_RS.UTF-8@latin', ('sr_RS@latin', 'utf8')),
+ ('sr_RS.UTF-8@latin', ('sr_RS@latin', 'UTF-8')),
+ ('sr_RS.UTF-8@latin', ('sr_RS@latin', None)),
+ ('ug_CN.UTF-8@latin', ('ug_CN@latin', 'utf8')),
+ ('ug_CN.UTF-8@latin', ('ug_CN@latin', 'UTF-8')),
+ ('ug_CN.UTF-8@latin', ('ug_CN@latin', None)),
+ ('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'utf8')),
+ ('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
+ ('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', None)),
+ ])
+ def test_setlocale_with_modifier(self, localename, localetuple):
+ try:
+ locale.setlocale(locale.LC_CTYPE, localename)
+ except locale.Error as exc:
+ self.skipTest(str(exc))
+ loc = locale.setlocale(locale.LC_CTYPE, localetuple)
+ self.assertEqual(loc, localename)
+
+ loctuple = locale.getlocale(locale.LC_CTYPE)
+ loc = locale.setlocale(locale.LC_CTYPE, loctuple)
+ self.assertEqual(loc, localename)
+
+ @support.subTests('localename,localetuple', [
+ ('fr_FR.iso885915@euro', ('fr_FR@euro', 'ISO8859-15')),
+ ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')),
+ ('fr_FR@euro', ('fr_FR@euro', 'ISO8859-15')),
+ ('de_DE.iso885915@euro', ('de_DE@euro', 'ISO8859-15')),
+ ('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-15')),
+ ('de_DE@euro', ('de_DE@euro', 'ISO8859-15')),
+ ('el_GR.iso88597@euro', ('el_GR@euro', 'ISO8859-7')),
+ ('el_GR.ISO8859-7@euro', ('el_GR@euro', 'ISO8859-7')),
+ ('el_GR@euro', ('el_GR@euro', 'ISO8859-7')),
+ ('ca_ES.iso885915@euro', ('ca_ES@euro', 'ISO8859-15')),
+ ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-15')),
+ ('ca_ES@euro', ('ca_ES@euro', 'ISO8859-15')),
+ ('ca_ES.utf8@valencia', ('ca_ES@valencia', 'UTF-8')),
+ ('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'UTF-8')),
+ ('ca_ES@valencia', ('ca_ES@valencia', 'UTF-8')),
+ ('ks_IN.utf8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
+ ('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
+ ('ks_IN@devanagari', ('ks_IN@devanagari', 'UTF-8')),
+ ('sd_IN.utf8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
+ ('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
+ ('sd_IN@devanagari', ('sd_IN@devanagari', 'UTF-8')),
+ ('be_BY.utf8@latin', ('be_BY@latin', 'UTF-8')),
+ ('be_BY.UTF-8@latin', ('be_BY@latin', 'UTF-8')),
+ ('be_BY@latin', ('be_BY@latin', 'UTF-8')),
+ ('sr_RS.utf8@latin', ('sr_RS@latin', 'UTF-8')),
+ ('sr_RS.UTF-8@latin', ('sr_RS@latin', 'UTF-8')),
+ ('sr_RS@latin', ('sr_RS@latin', 'UTF-8')),
+ ('ug_CN.utf8@latin', ('ug_CN@latin', 'UTF-8')),
+ ('ug_CN.UTF-8@latin', ('ug_CN@latin', 'UTF-8')),
+ ('ug_CN@latin', ('ug_CN@latin', 'UTF-8')),
+ ('uz_UZ.utf8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
+ ('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
+ ('uz_UZ@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
+ ])
+ def test_getlocale_with_modifier(self, localename, localetuple):
+ try:
+ locale.setlocale(locale.LC_CTYPE, localename)
+ except locale.Error as exc:
+ self.skipTest(str(exc))
+ loctuple = locale.getlocale(locale.LC_CTYPE)
+ self.assertEqual(loctuple, localetuple)
+
+ locale.setlocale(locale.LC_CTYPE, loctuple)
+ self.assertEqual(locale.getlocale(locale.LC_CTYPE), localetuple)
+
class TestMiscellaneous(unittest.TestCase):
def test_defaults_UTF8(self):
diff --git
a/Misc/NEWS.d/next/Library/2025-08-14-00-00-12.gh-issue-137729.i9NSKP.rst
b/Misc/NEWS.d/next/Library/2025-08-14-00-00-12.gh-issue-137729.i9NSKP.rst
new file mode 100644
index 00000000000000..b324a42c7f869e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-08-14-00-00-12.gh-issue-137729.i9NSKP.rst
@@ -0,0 +1,3 @@
+:func:`locale.setlocale` now supports language codes with ``@``-modifiers.
+``@``-modifier are no longer silently removed in :func:`locale.getlocale`,
+but included in the language code.
diff --git a/Tools/i18n/makelocalealias.py b/Tools/i18n/makelocalealias.py
index 02af1caff7d499..7f001abc09745d 100755
--- a/Tools/i18n/makelocalealias.py
+++ b/Tools/i18n/makelocalealias.py
@@ -44,6 +44,13 @@ def parse(filename):
# Ignore one letter locale mappings (except for 'c')
if len(locale) == 1 and locale != 'c':
continue
+ if '@' in locale and '@' not in alias:
+ # Do not simply remove the "@euro" modifier.
+ # Glibc generates separate locales with the "@euro" modifier, and
+ # not always generates a locale without it with the same encoding.
+ # It can also affect collation.
+ if locale.endswith('@euro') and not locale.endswith('.utf-8@euro'):
+ alias += '@euro'
# Normalize encoding, if given
if '.' in locale:
lang, encoding = locale.split('.')[:2]
@@ -51,6 +58,10 @@ def parse(filename):
encoding = encoding.replace('_', '')
locale = lang + '.' + encoding
data[locale] = alias
+ # Conflict with glibc.
+ data.pop('el_gr@euro', None)
+ data.pop('uz_uz@cyrillic', None)
+ data.pop('uz_uz.utf8@cyrillic', None)
return data
def parse_glibc_supported(filename):
@@ -81,7 +92,7 @@ def parse_glibc_supported(filename):
# Add an encoding to alias
alias, _, modifier = alias.partition('@')
alias = _locale._replace_encoding(alias, alias_encoding)
- if modifier and not (modifier == 'euro' and alias_encoding ==
'ISO-8859-15'):
+ if modifier:
alias += '@' + modifier
data[locale] = alias
return data
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]