commit:     cf37b8be253e4e4a3f78976ecac4056439933761
Author:     Kerin Millar <kfm <AT> plushkava <DOT> net>
AuthorDate: Thu Sep 18 22:39:36 2025 +0000
Commit:     Kerin Millar <kfm <AT> plushkava <DOT> net>
CommitDate: Thu Sep 18 23:03:27 2025 +0000
URL:        https://gitweb.gentoo.org/proj/locale-gen.git/commit/?id=cf37b8be

Tolerate a codeset/charmap of "UTF8" (for now)

It has been observed that some Gentoo installations have locale.gen(5)
files containing "C.UTF8 UTF-8" as an entry. Further, up until recently,
the catalyst tool would populate /etc/locale.gen in that way. Though
glibc tolerates "UTF8" as a codeset, locale-gen(8) does not, because no
file by that name resides in the /usr/share/i18n/charmaps directory.

For now, have the parse_config() subroutine handle "UTF8" as a special
case for both the locale and charmap fields. Where "UTF8" is seen, it
shall now be taken as "UTF-8", with a rather ostentatious warning being
issued. The intention is to ease the transition to >=locale-gen-3 in
advance of stable-keywording, while encouraging users to amend their
config files before eventually dropping support for this edge case.

Bug: https://bugs.gentoo.org/963046
Link: https://gitweb.gentoo.org/proj/catalyst.git/commit/?id=99af83b914
Signed-off-by: Kerin Millar <kfm <AT> plushkava.net>

 locale-gen | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/locale-gen b/locale-gen
index c18858f..0299f14 100644
--- a/locale-gen
+++ b/locale-gen
@@ -13,6 +13,7 @@ use File::Spec::Functions qw(canonpath catfile catdir path 
splitpath);
 use File::Temp qw(tempdir);
 use Getopt::Long ();
 use List::Util qw(any);
+use Term::ANSIColor qw(colored);
 
 # Formally stable as of v5.40; sufficiently functional in both v5.36 and v5.38.
 use experimental qw(try);
@@ -360,6 +361,31 @@ sub parse_config ($fh, $path, $locale_by, $charmap_by) {
                        $thrower->('Malformed locale declaration', $line);
                }
 
+               # Handle "UTF8" as a special case. Though glibc tolerates it,
+               # locale-gen would otherwise not because there is no charmap
+               # file by that name. The intention is to encourage users to
+               # amend their config files before eventually dropping support
+               # for "UTF8" altogether.
+               my @warnings;
+               if ($fields[0] =~ s/\.UTF\K8(?=@|\z)/-8/) {
+                       push @warnings,
+                               sprintf "WARNING! Codeset specified as UTF8 
(should be UTF-8) at %s[%d]: %s",
+                                       $path, $., render_printable($line);
+               }
+               if ($fields[1] =~ s/^UTF8\z/UTF-8/) {
+                       push @warnings,
+                               sprintf "WARNING! Charmap specified as UTF8 
(should be UTF-8) at %s[%d]: %s",
+                                       $path, $., render_printable($line);
+               }
+               for my $warning (@warnings) {
+                       if (-t 2) {
+                               *STDOUT->flush;
+                               warn colored($warning, 'bold yellow') . "\n";
+                       } else {
+                               warn "$warning\n";
+                       }
+               }
+
                # Extract the specified locale and character map. Upon success,
                # a canonicalised representation of the locale is also returned.
                my ($locale, $codeset, $charmap, $canonical) = 
parse_entry(@fields);

Reply via email to