commit: cf37b8be253e4e4a3f78976ecac4056439933761 Author: Kerin Millar <kfm <AT> plushkava <DOT> net> AuthorDate: Thu Sep 18 22:39:36 2025 +0000 Commit: Kerin Millar <kfm <AT> plushkava <DOT> net> CommitDate: Thu Sep 18 23:03:27 2025 +0000 URL: https://gitweb.gentoo.org/proj/locale-gen.git/commit/?id=cf37b8be
Tolerate a codeset/charmap of "UTF8" (for now) It has been observed that some Gentoo installations have locale.gen(5) files containing "C.UTF8 UTF-8" as an entry. Further, up until recently, the catalyst tool would populate /etc/locale.gen in that way. Though glibc tolerates "UTF8" as a codeset, locale-gen(8) does not, because no file by that name resides in the /usr/share/i18n/charmaps directory. For now, have the parse_config() subroutine handle "UTF8" as a special case for both the locale and charmap fields. Where "UTF8" is seen, it shall now be taken as "UTF-8", with a rather ostentatious warning being issued. The intention is to ease the transition to >=locale-gen-3 in advance of stable-keywording, while encouraging users to amend their config files before eventually dropping support for this edge case. Bug: https://bugs.gentoo.org/963046 Link: https://gitweb.gentoo.org/proj/catalyst.git/commit/?id=99af83b914 Signed-off-by: Kerin Millar <kfm <AT> plushkava.net> locale-gen | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/locale-gen b/locale-gen index c18858f..0299f14 100644 --- a/locale-gen +++ b/locale-gen @@ -13,6 +13,7 @@ use File::Spec::Functions qw(canonpath catfile catdir path splitpath); use File::Temp qw(tempdir); use Getopt::Long (); use List::Util qw(any); +use Term::ANSIColor qw(colored); # Formally stable as of v5.40; sufficiently functional in both v5.36 and v5.38. use experimental qw(try); @@ -360,6 +361,31 @@ sub parse_config ($fh, $path, $locale_by, $charmap_by) { $thrower->('Malformed locale declaration', $line); } + # Handle "UTF8" as a special case. Though glibc tolerates it, + # locale-gen would otherwise not because there is no charmap + # file by that name. The intention is to encourage users to + # amend their config files before eventually dropping support + # for "UTF8" altogether. + my @warnings; + if ($fields[0] =~ s/\.UTF\K8(?=@|\z)/-8/) { + push @warnings, + sprintf "WARNING! Codeset specified as UTF8 (should be UTF-8) at %s[%d]: %s", + $path, $., render_printable($line); + } + if ($fields[1] =~ s/^UTF8\z/UTF-8/) { + push @warnings, + sprintf "WARNING! Charmap specified as UTF8 (should be UTF-8) at %s[%d]: %s", + $path, $., render_printable($line); + } + for my $warning (@warnings) { + if (-t 2) { + *STDOUT->flush; + warn colored($warning, 'bold yellow') . "\n"; + } else { + warn "$warning\n"; + } + } + # Extract the specified locale and character map. Upon success, # a canonicalised representation of the locale is also returned. my ($locale, $codeset, $charmap, $canonical) = parse_entry(@fields);
