commit:     e9a590ba0804d0ade9fe842076ab66b7057b8c36
Author:     Kerin Millar <kfm <AT> plushkava <DOT> net>
AuthorDate: Tue Jul  1 21:00:37 2025 +0000
Commit:     Andreas K. Hüttel <dilfridge <AT> gentoo <DOT> org>
CommitDate: Tue Jul  1 21:00:37 2025 +0000
URL:        https://gitweb.gentoo.org/proj/locale-gen.git/commit/?id=e9a590ba

Initial commit of Perl-based locale-gen

Signed-off-by: Kerin Millar <kfm <AT> plushkava.net>
Signed-off-by: Andreas K. Hüttel <dilfridge <AT> gentoo.org>

 locale-gen | 984 ++++++++++++++++++++++++++++++++++---------------------------
 1 file changed, 557 insertions(+), 427 deletions(-)

diff --git a/locale-gen b/locale-gen
index d0e109e..c2e622a 100755
--- a/locale-gen
+++ b/locale-gen
@@ -1,451 +1,581 @@
-#!/bin/bash
+#!/usr/bin/perl
 
+# locale-gen
 #
-# Based upon Debian's locale-gen, fetched from glibc_2.3.6-7.diff.gz, but 
completely rewritten.
-#
+# Generates a glibc locale archive from templates, potentially limiting itself
+# to a set of locales defined by the admin, typically within /etc/locale.gen.
+
+use v5.36;
+
+use Errno qw(ENOENT);
+use Fcntl qw(SEEK_SET);
+use File::Spec::Functions qw(canonpath catfile catdir splitpath);
+use File::Temp qw(tempdir);
+use Getopt::Long ();
+use JSON::PP ();
+use POSIX qw(LC_ALL setlocale);
 
-# NB: Bash-4.0+ required.  We use += and ${var,,} features.
+# Formally stable as of v5.40; sufficiently functional in both v5.36 and v5.38.
+use experimental qw(try);
 
-unset POSIXLY_CORRECT IFS
-umask 0022
+# Determine the basename of the presently compiling script.
+my $PROGRAM;
+BEGIN { $PROGRAM = (splitpath(__FILE__))[-1]; }
 
-argv0=${0##*/}
+my $VERSION = '3.0';
 
-EPREFIX="@GENTOO_PORTAGE_EPREFIX@"
-if [[ ${EPREFIX} == "@"GENTOO_PORTAGE_EPREFIX"@" ]] ; then
-       EPREFIX=""
-fi
+my $DEFERRED_SIGNAL = '';
+my $PID = $$;
+my $TEMPDIR;
 
-FUNCTIONS_SH="/lib/gentoo/functions.sh"
-source "${EPREFIX}"${FUNCTIONS_SH} || {
-       echo "${argv0}: Could not source ${FUNCTIONS_SH}!" 1>&2
-       exit 1
+# For the C locale to be in effect can be a consequence of the user's chosen
+# locale not yet being available. That being the case, unset all environment
+# variables pertaining to locale handling for the benefit of any subprocesses.
+if (setlocale(LC_ALL) eq 'C') {
+       delete @ENV{ grep +( m/^(LANG\z|LC_)/ ), keys %ENV };
 }
 
-COMPILED_LOCALES=""
+# Unset BASH_ENV for security reasons. Even as sh(1), bash acts upon it. Unset
+# CDPATH also, for it is nothing but a liability in a non-interactive context.
+delete @ENV{'BASH_ENV', 'CDPATH'};
+
+{
+       # Determine the locale directory, as reported by localedef(1).
+       my $locale_dir = get_locale_dir();
+
+       # Infer the path of a Gentoo Prefix environment, if any.
+       my $gentoo_prefix = detect_gentoo_prefix($locale_dir);
+       if (length $gentoo_prefix) {
+               $locale_dir =~ s/^\Q$gentoo_prefix//;
+       }
+
+       # Collect any supported options and option-arguments.
+       my %opt = parse_opts($gentoo_prefix, @ARGV);
+       my $prefix = $opt{'prefix'} // $gentoo_prefix;
+
+       # A proxy check is justified because compilation may take a long time.
+       my $archive_dir = catdir($prefix, $locale_dir);
+       if (! utime undef, undef, $archive_dir) {
+               die "$PROGRAM: Aborting because UID $> can't modify 
'$archive_dir': $!\n";
+       }
+
+       # Honour the --quiet option.
+       if ($opt{'quiet'} && ! open *STDOUT, '>/dev/null') {
+               die "Can't direct STDOUT to /dev/null: $!";
+       }
+
+       # Ensure that the C.UTF-8 locale is made available.
+       my @locales = ([ 'C', 'UTF-8', 'C.UTF-8' ]);
+
+       # Compose a list of up to two configuration files to be read.
+       my @config_files;
+       if (exists $opt{'config'}) {
+               push @config_files, $opt{'config'};
+       } else {
+               push @config_files, (
+                       catfile($prefix, '/etc', 'locale.gen'),
+                       catfile($prefix, '/usr/share/i18n', 'SUPPORTED')
+               );
+       }
+
+       # Collect the locales that are being requested for installation.
+       push @locales, read_config($prefix, @config_files);
+
+       # Compose a dictionary of installed locales for the --update option.
+       my %installed_by;
+       if ($opt{'update'}) {
+               # If localedef(1) originates from a Gentoo Prefix environment,
+               # the prefix will already have been hard-coded by the utility.
+               my $explicit_prefix = length $gentoo_prefix ? undef : $prefix;
+               %installed_by = map +( $_ => 1 ), 
list_locales($explicit_prefix);
+       }
+
+       # Filter out locales that are duplicates or that are already installed.
+       my %requested_by;
+       my $i = 0;
+       while ($i <= $#locales) {
+               my $canonical = $locales[$i][2];
+               my $normal = normalize($canonical);
+               if ($requested_by{$normal}++ || $installed_by{$normal}) {
+                       splice @locales, $i, 1;
+               } else {
+                       ++$i;
+               }
+       }
+
+       # If a non-actionable update was requested, proceed no further.
+       if (! @locales) {
+               print "All of the requested locales are presently installed.\n";
+               exit 0;
+       }
+
+       # Create a temporary directory and switch to it.
+       enter_tempdir($prefix);
+
+       # Compile the selected locales.
+       generate_locales($prefix, $opt{'jobs'}, @locales);
+
+       # Integrate the newly compiled locales into the system's locale archive.
+       generate_archive($prefix, $locale_dir, $opt{'update'}, map +( $_->[2] 
), @locales);
+
+       my $total = scalar @locales;
+       printf "Successfully installed %d locale%s.\n", $total, plural($total);
+}
 
-show_usage() {
-       cat <<-EOF
-       Usage: ${HILITE}${argv0}${NORMAL} ${GOOD}[options]${NORMAL} -- 
${GOOD}[localedef options]${NORMAL}
+sub get_locale_dir () {
+       my $stdout = qx{ localedef --help };
+       if ($? == 0 && $stdout =~ m/\hlocale path\h*:\s+(\/[^:]+)/) {
+               return canonpath($1);
+       } else {
+               die "Can't parse locale directory from localedef(1) output: $!";
+       }
+}
 
-       Generate locales based upon the config file /etc/locale.gen.
+sub detect_gentoo_prefix ($path) {
+       if ($path !~ s/\/usr\/lib\/locale\z//) {
+               die "Can't handle unexpected locale directory of '$path'";
+       } elsif (length $path && -e "$path/etc/gentoo-release") {
+               return $path;
+       } else {
+               return '';
+       }
+}
 
-       ${HILITE}Options:${NORMAL}
-           ${GOOD}-k, --keep${NORMAL}               Don't nuke existing locales
-           ${GOOD}-d, --destdir <dir>${NORMAL}      Use locale data in 
specified DESTDIR tree
-           ${GOOD}-c, --config <config>${NORMAL}    Use specified config 
instead of default locale.gen
-           ${GOOD}-l, --list${NORMAL}               List all the locales to be 
generated
-           ${GOOD}-a, --ask${NORMAL}                Ask before generating each 
locale
-           ${GOOD}-A, --all${NORMAL}                Pretend the locale list 
contains all locales
-           ${GOOD}-u, --update${NORMAL}             Only generate locales that 
are missing
-           ${GOOD}-G, --generate <locale>${NORMAL}  Generate specified locale 
(one shot; implies -k -u)
-           ${GOOD}-j, --jobs <num>${NORMAL}         Number of locales to 
generate at a time (parallel)
-           ${GOOD}-q, --quiet${NORMAL}              Only show errors
-           ${GOOD}-V, --version${NORMAL}            Meaningless version 
information
-           ${GOOD}-h, --help${NORMAL}               Show this help cruft
+sub parse_opts ($known_prefix, @args) {
+       my @options = (
+               [ 'config|c=s' => "The file containing the chosen locales 
(default: $known_prefix/etc/locale.gen)" ],
+               [ 'all|A'      => 'Select all locales, ignoring the config 
file' ],
+               [ 'update|u'   => 'Skip any chosen locales that are already 
installed', ],
+               [ 'jobs|j=i'   => 'Maximum number of localedef(1) instances to 
run in parallel' ],
+               [ 'prefix|p=s' => 'The prefix of the root filesystem' ],
+               [ 'quiet|q'    => 'Only show errors' ],
+               [ 'version|V'  => 'Output version information and exit' ],
+               [ 'help|h'     => 'Display this help and exit' ]
+       );
+
+       # Parse the provided arguments.
+       my $parser = Getopt::Long::Parser->new;
+       $parser->configure(qw(posix_default bundling_values no_ignore_case));
+       my %opt;
+       {
+               # Decorate option validation errors while also not permitting
+               # for more than one to be reported.
+               local $SIG{'__WARN__'} = sub ($error) { die "$PROGRAM: $error" 
};
+               $parser->getoptionsfromarray(\@args, \%opt, map +( $_->[0] ), 
@options);
+       }
+
+       # If either --help or --version was specified, exclusively attend to it.
+       if ($opt{'help'}) {
+               show_usage(@options);
+               exit;
+       } elsif ($opt{'version'}) {
+               show_version();
+               exit;
+       }
+
+       # Validate the options and option-arguments.
+       if ($opt{'all'} && exists $opt{'config'}) {
+               die "$PROGRAM: The --all and --config options are mutually 
exclusive\n";
+       } elsif (exists $opt{'prefix'} && length $known_prefix && ! 
is_eq_file($known_prefix, $opt{'prefix'})) {
+               die "$PROGRAM: The --prefix option specifies a path contrary to 
a detected Gentoo Prefix\n";
+       }
+
+       # Assign values for unspecified options that need them.
+       if (! exists $opt{'jobs'} || $opt{'jobs'} < 1) {
+               $opt{'jobs'} = get_nprocs() || 1;
+       }
+       if ($opt{'all'}) {
+               $opt{'config'} = catfile($opt{'prefix'} // $known_prefix, 
'/usr/share/i18n', 'SUPPORTED');
+       } elsif (exists $opt{'config'} && $opt{'config'} eq '-') {
+               $opt{'config'} = '/dev/stdin';
+       }
+
+       return %opt;
+}
 
-       ${HILITE}Localedef Options:${NORMAL}
-           By default, ${GOOD}${LOCALEDEF_OPTS}${NORMAL} is passed to 
localedef.
+sub show_usage (@options) {
+       print "Usage: locale-gen [OPTION]...\n\n";
+       my $pipe;
+       if (! open $pipe, "| column -t -s \037") {
+               exit 1;
+       }
+       for my $row (@options) {
+               my ($spec, $description) = $row->@*;
+               my ($long, $short) = split /[|=]/, $spec;
+               printf {$pipe} "-%s, --%s\037%s\n", $short, $long, $description;
+       }
+}
 
-       For more info, see the ${HILITE}locale-gen${NORMAL}(1) and 
${HILITE}locale.gen${NORMAL}(8) manpages.
+sub show_version () {
+       print <<~EOF;
+       locale-gen $VERSION
+       Copyright 2024 Kerin Millar <kfm\@plushkava.net>
+       License GPL-2.0-only 
<https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html>
        EOF
-       [[ $# -eq 0 ]] && exit 0
-       echo ""
-       eerror "Unknown option '$1'"
-       exit 1
 }
 
-show_version() {
-       echo "locale-gen-2.xxx"
-       exit 0
+sub list_locales ($prefix) {
+       if (! defined(my $pid = open my $pipe, '-|')) {
+               die "Can't fork: $!";
+       } elsif ($pid == 0) {
+               run_localedef($prefix, '--list-archive');
+       } else {
+               chomp(my @locales = readline $pipe);
+               if (-1 == waitpid($pid, 0) || $? != 0) {
+                       die "$PROGRAM: Can't obtain a list of the presently 
installed locales\n";
+               }
+               return @locales;
+       }
 }
 
-LOCALEDEF_OPTS=""
-KEEP=""
-DESTDIR=""
-CONFIG=""
-JUST_LIST=""
-ASK=""
-ALL=""
-UPDATE=""
-GENERATE=""
-JOBS_MAX=""
-QUIET=0
-SET_X=""
-LOCALE_ARCHIVE=true
-INPLACE_GLIBC=""
-while [[ $# -gt 0 ]] ; do
-       case $1 in
-               --inplace-glibc)           INPLACE_GLIBC=$1;;
-               -k|--keep|--keep-existing) KEEP=$1;;
-               -d|--destdir)              shift; DESTDIR=$1; unset ROOT;;
-               -c|--config)               shift; CONFIG=$1;;
-               -l|--list)                 JUST_LIST=$1;;
-               -a|--ask)                  ASK=$1;;
-               -A|--all)                  ALL=$1;;
-               -u|--update)               UPDATE=$1;;
-               -G|--generate)             shift; GENERATE=$1;;
-               -j|--jobs)                 shift; JOBS_MAX=$(( $1 ));;
-               -j*)                       : $(( JOBS_MAX = ${1#-j} ));;
-               -q|--quiet)                : $(( ++QUIET ));;
-               -x|--debug)                SET_X="true";;
-               -V|--version)              show_version;;
-               -h|--help)                 show_usage;;
-               --)                        shift; LOCALEDEF_OPTS=$*; break;;
-               *)                         show_usage $1;;
-       esac
-       shift
-done
-
-if [[ -n ${COMPILED_LOCALES} ]] ; then
-       ewarn "All locales have been installed and registered by the package 
manager. If you"
-       ewarn "rebuild the locale archive now, file integrity tools may show it 
as corrupted."
-       ewarn "This is not really a big problem, but a better solution is to 
disable"
-       ewarn "USE=compile-locales and re-install glibc if you dont need all 
locales."
-       echo
-fi
-
-if [[ -z ${JOBS_MAX} ]] ; then
-       JOBS_MAX=$(getconf _NPROCESSORS_ONLN 2>/dev/null)
-       : "${JOBS_MAX:=1}"
-fi
-[[ ${JOBS_MAX} -lt 1 ]] && JOBS_MAX=1
-[[ -n ${SET_X} ]] && set -x
-: "${KEEP:=${JUST_LIST}}"
-[[ -n ${GENERATE} ]] && UPDATE="true" && KEEP="true"
-
-: "${ROOT:=/}"
-ROOT="${ROOT%/}/"
-
-if [[ ${ROOT} != "/" ]] ; then
-       eerror "Sorry, but ROOT is not supported."
-       exit 0
-fi
-
-: "${EROOT:=${EPREFIX}/}"
-if [[ ${EROOT} != "/" ]] ; then
-       einfo "Using locale.gen from ${EROOT%/}/etc/"
-fi
-
-if [[ -n ${DESTDIR} ]] ; then
-       DESTDIR="${DESTDIR%/}/"
-       einfo "Building locales in DESTDIR '${DESTDIR}'"
-else
-       DESTDIR="${EROOT%/}/"
-fi
-
-: "${CONFIG:=${EROOT%/}/etc/locale.gen}"
-LOCALES=${DESTDIR}usr/share/i18n/locales
-CHARMAPS=${DESTDIR}usr/share/i18n/charmaps
-SUPPORTED=${DESTDIR}usr/share/i18n/SUPPORTED
-ALIAS=${DESTDIR}usr/share/locale/locale.alias
+sub normalize ($canonical) {
+       if (2 == (my ($locale, $charmap) = split /\./, $canonical, 3)) {
+               # en_US.UTF-8 => en_US.utf8; en_US.ISO-8859-1 => en_US.iso88591
+               return join '.', $locale, lc($charmap =~ s/-//gr);
+       } else {
+               die "Can't normalize " . render_printable($canonical);
+       }
+}
 
-#
-# Grab any user options in their config file
-options=$(sed -n \
-       -e '/^[[:space:]]*#%/s:^[[:space:]]*#%[[:space:]]*::p'\
-       "${CONFIG}" 2>/dev/null
-)
-IFS=$'\n'
-for option in ${options} ; do
-       case ${option} in
-               no-locale-archive)
-                       LOCALE_ARCHIVE=false
-                       ;;
-               *)
-                       ewarn "Unrecognized option '${option}'"
-                       ;;
-       esac
-done
-unset IFS
-
-[[ -n ${ALL} ]] && CONFIG=${SUPPORTED}
-
-# Extract the location of the locale dir on the fly as `localedef --help` has:
-#                        locale path    : /usr/lib64/locale:/usr/share/i18n
-# For long paths, the line may get wrapped into two, in which case space (' ') 
is replaced
-# by newline (\n).
-LOCALEDIR=$(LC_ALL="C" "${DESTDIR}"usr/bin/localedef --help | sed -n -r 
'/locale path/{N;s|.*:[ \n](.*):/.*|\1|;p}')
-LOCALEDIR="${DESTDIR}${LOCALEDIR#${EPREFIX}}"
-if [[ $? -ne 0 ]] || [[ -z ${LOCALEDIR} ]] || [[ ${LOCALEDIR} != 
${DESTDIR}/usr/lib*/locale ]] ; then
-       eerror "Unable to parse the output of your localedef utility." 1>&2
-       eerror "File a bug about this issue and include the output of 
'localedef --help'." 1>&2
-       exit 1
-fi
-
-# Only generate locales the user specified before falling back to the config.
-locales_to_generate=${GENERATE}
-
-if [[ -z ${locales_to_generate} ]] && [[ -e ${CONFIG} ]] ; then
-       locales_to_generate=$(sed \
-               -e 's:#.*::' \
-               -e '/^[[:space:]]*$/d' \
-               "${CONFIG}" | sort)
-       # Sanity check to make sure people did not duplicate entries. #550884
-       # The first column must be unique specifically. #235555
-       dup_locales_to_generate=$(
-               echo "${locales_to_generate}" | \
-                       awk '{ if ($1 == last) { print lastline; print; } else 
{ lastline = $0; last = $1; } }')
-       if [[ -n ${dup_locales_to_generate} ]] ; then
-               ewarn "These locales have been duplicated in your 
config:\n${dup_locales_to_generate}"
-               ewarn "Some might be filtered, but you must fix it."
-               locales_to_generate=$(echo "${locales_to_generate}" | uniq)
-       fi
-fi
-
-# Transform the name in locales.gen to the name used when storing the locale 
data in
-# /usr/lib/locale/.  This normalize algo is taken out of the glibc localedef 
source:
-# 
https://sourceware.org/git/?p=glibc.git;a=blob;f=locale/programs/localedef.c;hb=glibc-2.34#l562
-normalize() {
-       if [[ $1 == *.* ]] ; then
-               local ret=${1##*.}
-               ret=${ret,,}
-               echo "${1%%.*}.${ret//-}"
-       else
-               echo "$1"
-       fi
+sub read_config ($prefix, @paths) {
+       # Compose a dictionary of locale names known to be valid.
+       my %locale_by = map +( $_ => 1 ), get_valid_locales($prefix);
+
+       # Compose a dictionary of character maps known to be valid.
+       my %charmap_by = map +( $_ => 1 ), get_valid_charmaps($prefix);
+
+       # Iterate over the given paths and return the first non-empty list of
+       # valid locale declarations that can be found among them, if any.
+       for my $i (keys @paths) {
+               my $path = $paths[$i];
+               try {
+                       my $fh = fopen($path);
+                       $! = 0;
+                       if (my @locales = parse_config($fh, $path, \%locale_by, 
\%charmap_by)) {
+                               return @locales;
+                       }
+               } catch ($e) {
+                       # Disregard open errors concerning non-existent files
+                       # unless there are no more paths to be tried. Validation
+                       # errors shall also be propagated here.
+                       if ($! != ENOENT || $i == $#paths) {
+                               die $e;
+                       }
+               }
+       }
+
+       # For no locales to have been discovered at this point is exceptional.
+       my $path_list = render_printable(scalar @paths == 1 ? $paths[0] : 
\@paths);
+       die "$PROGRAM: No locale declarations were found within $path_list\n";
+}
+
+sub get_valid_locales ($prefix) {
+       my $top = local $ENV{'TOP'} = catdir($prefix, 
'/usr/share/i18n/locales');
+       my @paths = qx{ cd -- "\$TOP" && find . ! -path . -prune ! -path '*\n*' 
-type f -exec grep -lxF LC_IDENTIFICATION {} + };
+       if ($? != 0 || ! @paths) {
+               die "$PROGRAM: Failed to compose a list of valid locale names 
from '$top'\n";
+       }
+       chomp @paths;
+       return map +( (splitpath($_))[-1] ), @paths;
+}
+
+sub get_valid_charmaps ($prefix) {
+       my $top = catdir($prefix, '/usr/share/i18n/charmaps');
+       if (! opendir my $dh, $top) {
+               die "$PROGRAM: Can't open '$top' for reading: $!\n";
+       } elsif (! (my @names = map +( -f "$top/$_" ? s/\.gz\z//r : () ), 
readdir $dh)) {
+               die "$PROGRAM: Failed to compose a list of valid character maps 
from '$top'\n";
+       } else {
+               return @names;
+       }
+}
+
+sub parse_config ($fh, $path, $locale_by, $charmap_by) {
+       # Set up a helper routine to throw for validation errors.
+       my $thrower = sub ($error, $line) {
+               die sprintf "%s: %s at %s[%d]: %s\n",
+                       $PROGRAM, $error, $path, $., render_printable($line);
+       };
+
+       my @locales;
+       while (my $line = readline $fh) {
+               # Skip comments and blank lines. Note that \h will match only " 
" and
+               # "\t", since the input stream is not being subjected to any 
decoding.
+               next if $line =~ m/^\h*($|#)/;
+
+               # Expect for two fields, separated by horizontal whitespace.
+               my @fields;
+               chomp $line;
+               if (2 != (@fields = split /\h+/, trim_line($line), 3)) {
+                       $thrower->('Malformed locale declaration', $line);
+               }
+
+               # Extract the specified locale and character map. Upon success,
+               # a canonicalised representation of the locale is also returned.
+               my ($locale, $charmap, $canonical) = parse_entry(@fields);
+
+               # Validate both locale and character map before accepting.
+               if (! $locale_by->{$locale}) {
+                       $thrower->('Invalid locale', $line);
+               } elsif (! $charmap_by->{$charmap}) {
+                       $thrower->('Invalid/mismatching charmap', $line);
+               } else {
+                       push @locales, [ $locale, $charmap, $canonical ];
+               }
+       }
+
+       return @locales;
+}
+
+sub parse_entry ($locale, $charmap) {
+       my $canonical;
+       if (2 == (my @fields = split /@/, $locale, 3)) {
+               # de_DE@euro ISO-8859-15 => de_DE.ISO-8859-15@euro
+               $canonical = sprintf '%s.%s@%s', $fields[0], $charmap, 
$fields[1];
+       } elsif (2 == (@fields = split /\./, $locale, 3)) {
+               # en_US.UTF-8 UTF-8 => en_US.UTF-8
+               $locale = $fields[0];
+               $canonical = "$locale.$charmap";
+               if ($fields[1] ne $charmap) {
+                       $charmap = '';
+               }
+       } elsif (1 == @fields) {
+               # en_US ISO-8859-1 => en_US.ISO-8859-1
+               $canonical = "$locale.$charmap";
+       }
+       return $locale, $charmap, $canonical;
+}
+
+sub enter_tempdir ($prefix) {
+       # Given that /tmp might be a tmpfs, prefer /var/tmp so as to avoid
+       # undue memory pressure.
+       my $dir = catdir($prefix, '/var/tmp');
+       if (! -d $dir) {
+               $dir = File::Spec->tmpdir;
+       }
+       $TEMPDIR = tempdir('locale-gen.XXXXXXXXXX', 'DIR' => $dir);
+       if (! chdir $TEMPDIR) {
+               die "$PROGRAM: Can't chdir to '$TEMPDIR': $!\n";
+       }
+}
+
+sub generate_locales ($prefix, $workers, @locales) {
+       # Trap SIGINT and SIGTERM so that they may be handled gracefully.
+       my $handler = sub ($signal) { $DEFERRED_SIGNAL ||= $signal };
+       local @SIG{'INT', 'TERM'} = ($handler, $handler);
+
+       my $total = scalar @locales;
+       printf "Compiling %d locale definition file%s with %d worker%s ...\n",
+               $total, plural($total), $workers, plural($workers);
+
+       my $num_width = length $total;
+       my %status_by;
+       for my $i (keys @locales) {
+               # Ensure that the number of concurrent workers is bounded.
+               if ($i >= $workers) {
+                       my $pid = wait;
+                       last if 0 != ($status_by{$pid} = $?);
+               }
+
+               my ($locale, $charmap, $canonical) = $locales[$i]->@*;
+               printf "[%*d/%d] Compiling locale: %s\n",
+                       $num_width, $i + 1, $total, $canonical;
+
+               # Fork and execute localedef(1) for locale compilation.
+               if (! defined(my $pid = fork)) {
+                       warn "Can't fork: $!";
+                       last;
+               } elsif ($pid == 0) {
+                       @SIG{'INT', 'TERM'} = ('DEFAULT', 'DEFAULT');
+                       compile_locale($locale, $charmap, $canonical);
+               }
+       } continue {
+               last if $DEFERRED_SIGNAL;
+       }
+
+       # Reap any subprocesses that remain.
+       if ($workers > 1) {
+               print "Waiting for active workers to finish their jobs ...\n";
+       }
+       while (-1 != (my $pid = wait)) {
+               $status_by{$pid} = $?;
+       }
+
+       # Abort if any of the collected status codes are found to be non-zero.
+       # In the case that one subprocess was interrupted by a signal while
+       # another exited non-zero, the resulting diagnostic shall allude to the
+       # signal. Such determinism is achieved by sorting the values.
+       for my $status (sort { $a <=> $b } values %status_by) {
+               throw_child_error('localedef', $status);
+       }
+
+       if ($DEFERRED_SIGNAL) {
+               # The signal shall be propagated by the END block.
+               exit;
+       } elsif (scalar %status_by != $total) {
+               die "$PROGRAM: Aborting because not all of the selected locales 
were compiled\n";
+       }
 }
 
-# These funky sed's are based on the stuff in glibc's localedata/Makefile
-# Basically we want to rewrite the display like so:
-# <locale without a . or @>.<charmap>[@extra stuff after the @ in the locale]
-# en_US       ISO-8859-1  -> en_US.ISO-8859-1
-# en_US.UTF-8 UTF-8       -> en_US.UTF-8
-# de_DE@euro  ISO-8859-15 -> de_DE.ISO-8859-15@euro
-locales_disp=$(echo "${locales_to_generate}" | sed \
-       -e '   /@/   
s:[[:space:]]*\([^@[:space:]]*\)\([^[:space:]]*\)[[:space:]]\+\([^[:space:]]*\):\1.\3\2:'
 \
-       -e 
'/^[^@]*$/s:[[:space:]]*\([^.[:space:]]*\)\([^[:space:]]*\)[[:space:]]\+\([^[:space:]]*\):\1.\3:')
-
-# Now check the normalized version for C.UTF-8, and add it if not present
-if [[ -z ${locales_to_generate} ]] ; then
-       if [[ -z ${JUST_LIST} ]] ; then
-               [[ ${QUIET} -eq 0 ]] && \
-                       ewarn "No locales to generate found, keeping archive 
but ensuring C.UTF-8 is present"
-               KEEP=1
-               UPDATE=1
-               locales_disp='C.UTF-8'
-               locales_to_generate='C.UTF-8 UTF-8'
-       fi
-else
-       if echo ${locales_disp} | grep -vqi 'C.UTF-8' ; then
-               locales_to_generate=$(echo "${locales_to_generate}" ; echo -n 
'C.UTF-8 UTF-8')
-               locales_disp=$(echo "${locales_disp}" ; echo -n 'C.UTF-8')
-       fi
-fi
-
-mkdir -p "${LOCALEDIR}"
-if [[ -z ${KEEP} && -z ${UPDATE} ]] ; then
-       # Remove all old locale dir and locale-archive before generating new
-       # locale data.  Scrubbing via update is done elsewhere.
-       rm -rf "${LOCALEDIR}"/* &> /dev/null || true
-fi
-
-eval declare -a locales_disp=(${locales_disp})
-eval declare -a locales_to_generate=(${locales_to_generate})
-total=$(( ${#locales_to_generate[*]} / 2 ))
-
-[[ ${QUIET} -eq 0 ]] && [[ -z ${JUST_LIST} ]] && \
-einfo "Generating ${total} locales (this might take a while) with ${JOBS_MAX} 
jobs"
-
-if [[ -n ${UPDATE} ]] ; then
-       # normalize newlines into spaces
-       existing_locales=" $(echo $(locale -a 2>/dev/null)) "
-fi
-
-generate_locale() {
-       local output=""
-
-       if [[ -z ${ASK} ]] && [[ ${QUIET} -eq 0 ]] ; then
-               output=" (${cnt_fmt}/${total}) Generating ${disp}"
-       fi
-
-       if [[ $(( JOB_IDX_E - JOB_IDX_S )) == ${JOBS_MAX} ]] ; then
-               wait ${JOB_PIDS[$(( JOB_IDX_S++ ))]}
-               JOB_RETS+=( $? )
-       fi
-       (
-               # Accumulate all the output in one go so the parallel
-               # jobs don't tromp on each other
-               x=$(
-                       [[ -n ${output} ]] && ebegin "${output}"
-                       # In most cases, localedef can just use the system 
glibc.
-                       # However, if we are within a major glibc upgrade, this 
may fail
-                       # in src_* phases since the new localedef links against 
the new
-                       # glibc, but the new glibc is not installed yet...
-                       if [[ -z ${INPLACE_GLIBC} ]] ; then
-                               "${DESTDIR}"usr/bin/localedef ${LOCALEDEF_OPTS} 
\
-                                       --no-archive \
-                                       -i "${input}" \
-                                       -f "${charmap}" \
-                                       -A "${ALIAS}" \
-                                       --prefix "${DESTDIR%${EPREFIX}/}/" \
-                                       "${locale}" 2>&1
-                       else
-                               # We assume that the current directory is 
"${ED}"/$(get_libdir),
-                               # see the glibc ebuild, function 
glibc_sanity_check(), for why.
-                               LC_ALL=C ./ld-*.so --library-path . \
-                                       "${DESTDIR}"usr/bin/localedef 
${LOCALEDEF_OPTS} \
-                                       --no-archive \
-                                       -i "${input}" \
-                                       -f "${charmap}" \
-                                       -A "${ALIAS}" \
-                                       --prefix "${DESTDIR%${EPREFIX}/}/" \
-                                       "${locale}" 2>&1
-                       fi
-                       ret=$?
-                       [[ -n ${output} ]] && eend ${ret}
-                       exit ${ret}
-               )
-               ret=$?
-               if [[ -n ${output} ]] ; then
-                       echo "${x}"
-               elif [[ ${ret} -ne 0 ]] ; then
-                       eerror "${disp}: ${x}"
-               fi
-
-               if [[ ${ret} -ne 0 && ${locale} == */* ]] ; then
-                       ewarn "Perhaps you meant to use a space instead of a / 
in your config file ?"
-               fi
-               exit ${ret}
-       ) &
-       JOB_PIDS+=( $! )
-       : $(( ++JOB_IDX_E ))
+sub compile_locale ($locale, $charmap, $canonical) {
+       my $output_dir = "./$canonical";
+       my @args = ('--no-archive', '-i', $locale, '-f', $charmap, '--', 
$output_dir);
+       run_localedef(undef, @args);
+}
+       
+sub generate_archive ($prefix, $locale_dir, $do_update, @canonicals) {
+       # Create the temporary subdir that will contain the new locale archive.
+       my $output_dir = catdir('.', $prefix, $locale_dir);
+       run('mkdir', '-p', '--', $output_dir);
+
+       # Determine the eventual destination path of the archive.
+       my $final_path = catfile($prefix, $locale_dir, 'locale-archive');
+       printf "The location of the archive shall be %s.\n", 
render_printable($final_path);
+
+       # If --update was specified, make a copy of the existing archive.
+       if ($do_update && -e $final_path) {
+               run('cp', '--', $final_path, "$output_dir/");
+       }
+
+       # Integrate all of the compiled locales into the new locale archive.
+       my $total = scalar @canonicals;
+       printf "Adding %d locale%s to the locale archive ...\n", $total, 
plural($total);
+       my $stderr = fopen('stderr.log', '+>');
+       redirect_stderr($stderr, sub {
+               my @args = ('--quiet', '--add-to-archive', '--replace', '--', 
@canonicals);
+               run_localedef('.', @args);
+       });
+
+       # Propagate the diagnostics and errors raised by localedef(1), if any.
+       seek $stderr, 0, SEEK_SET;
+       my $i = 0;
+       while (my $line = readline $stderr) {
+               warn $line;
+               ++$i;
+       }
+       close $stderr;
+
+       # Check the status code first.
+       throw_child_error('localedef');
+
+       # Sadly, the exit status of GNU localedef(1) is nigh on useless in the
+       # case that the --add-to-archive option is provided. If anything was
+       # printed to STDERR at all, act as if the utility had exited 1.
+       if ($i > 0) {
+               throw_child_error('localedef', 1 << 8);
+       }
+
+       # The process of replacing the old archive must not be interrupted.
+       local @SIG{'INT', 'TERM'} = ('IGNORE', 'IGNORE');
+
+       # Move the newly minted archive into the appropriate filesystem. Use
+       # mv(1), since there is a chance of crossing a filesystem boundary.
+       my $interim_path = "$final_path.$$";
+       run('mv', '--', catfile($output_dir, 'locale-archive'), $interim_path);
+
+       # Atomically replace the old archive.
+       if (! rename $interim_path, $final_path) {
+               {
+                       local $!;
+                       unlink $interim_path;
+               }
+               die "$PROGRAM: Can't rename '$interim_path' to '$final_path': 
$!\n";
+       }
+}
+
+sub run_localedef ($prefix, @args) {
+       # Incorporate the --prefix option, if requested.
+       if (length $prefix) {
+               unshift @args, '--prefix', $prefix;
+       }
+
+       # Prevent the --verbose option from being potentially implied.
+       delete local $ENV{'POSIXLY_CORRECT'};
+
+       # Execute localedef(1). Don't fork if doing so from a child process.
+       my @cmd = ('localedef', @args);
+       if ($$ == $PID) {
+               system @cmd;
+       } elsif (! exec @cmd) {
+               exit 1;
+       }
+}
+
+sub fopen ($path, $mode = '<') {
+       if (! open my $fh, $mode, $path) {
+               die "$PROGRAM: Can't open '$path': $!\n";
+       } elsif (! -f $fh && canonpath($path) !~ m/^\/dev\/(null|stdin)\z/) {
+               die "$PROGRAM: Won't open '$path' because it is not a regular 
file\n";
+       } else {
+               return $fh;
+       }
+}
+
+sub get_nprocs () {
+       chomp(my $nproc = qx{ { nproc || getconf _NPROCESSORS_CONF; } 
2>/dev/null });
+       return $nproc;
+}
+
+sub is_eq_file ($path1, $path2) {
+       # The -ef primary is standard as of POSIX.1-2024.
+       local @ENV{'PATH1', 'PATH2'} = ($path1, $path2);
+       return 0 == system q{ test "$PATH1" -ef "$PATH2" };
+}
+
+sub plural ($int) {
+       return $int == 1 ? '' : 's';
 }
 
-JOB_PIDS=()
-JOB_RETS=()
-JOB_IDX_S=0
-JOB_IDX_E=0
-cnt=0
-lidx=0
-# Keep track of (normalized) locales generated in case the request has 
different inputs that
-# normalize down to the same value.  We trim $existing_locales as we go for 
later use which
-# prevents its direct use.
-generated_locales=" "
-while [[ -n ${locales_to_generate[${lidx}]} ]] ; do
-       : $(( ++cnt ))
-       locale=${locales_to_generate[$((lidx++))]}
-       charmap=${locales_to_generate[$((lidx++))]}
-
-       # XXX: if we wanted to, we could check existence of
-       #      ${LOCALES}/${locale} and ${CHARMAPS}/${charmap}
-       #      this would fail for things like "en_US.UTF8", but
-       #      in that case we could fall back to checking the
-       #      SUPPORTED file ... then again, the localedef
-       #      below will abort nicely for us ...
-       if [[ -z ${locale} || -z ${charmap} ]] ; then
-               eerror "Bad entry in locale.gen: '${locale} ${charmap}'; 
skipping"
-               continue
-       fi
-
-       disp=${locales_disp[$(( cnt - 1 ))]}
-
-       normalized_locale=$(normalize ${locale})
-       if [[ ${generated_locales} == *" ${normalized_locale} "* ]] ; then
-               already_generated="true"
-       else
-               already_generated="false"
-       fi
-       generated_locales+="${normalized_locale} "
-       if ${already_generated} || \
-          [[ -n ${UPDATE} && ${existing_locales} == *" ${normalized_locale} "* 
]] ; then
-               existing_locales=${existing_locales/ ${normalized_locale} / }
-               if [[ ${QUIET} -eq 0 ]] ; then
-                       cnt_fmt=$(printf "%${#total}i" ${cnt})
-                       einfo " (${cnt_fmt}/${total}) Skipping ${disp}"
-               fi
-               continue
-       fi
-
-       # If the locale is like 'en_US.UTF8', then we really want 'en_US'
-       if [[ -f ${LOCALES}/${locale} ]] ; then
-               input=${locale}
-       else
-               input=${locale%%.*}
-       fi
-
-       if [[ -z ${JUST_LIST} ]] ; then
-               # Format the output for the question/status
-               cnt_fmt=$(printf "%${#total}i" ${cnt})
-               if [[ -n ${ASK} ]] ; then
-                       einfon " (${cnt_fmt}/${total}) Generate ${disp} ? (Y/n) 
"
-                       read user_answer
-                       [[ ${user_answer} == [nN]* ]] && continue
-               fi
-               generate_locale
-       else
-               echo "${disp}"
-       fi
-done
-
-for (( i = JOB_IDX_S; i < JOB_IDX_E; ++i )) ; do
-       wait ${JOB_PIDS[i]}
-       JOB_RETS+=( $? )
-done
-ret=$(( 0 ${JOB_RETS[@]/#/+} ))
-
-[[ ${QUIET} -eq 0 ]] && [[ -z ${JUST_LIST} ]] && \
-einfo "Generation complete"
-
-if ${LOCALE_ARCHIVE} && [[ -z ${JUST_LIST} ]] ; then
-       # need to check that at least one locale has to be added
-       if [[ $(echo "${LOCALEDIR}"/*/) != "${LOCALEDIR}"'/*/' ]] ; then
-               [[ ${QUIET} -eq 0 ]] && ebegin "Adding locales to archive"
-               # The pattern ends with / on purpose: we don't care about files 
(like
-               # locale-archive) in the locale subdir, and we definitely don't 
want to
-               # delete them!
-               for LOC in "${LOCALEDIR}"/*/; do
-                       LOC=${LOC%/} # Strip trailing /, since localedef 
doesn't like it
-                       x=$(
-                               # In most cases, localedef can just use the 
system glibc.
-                               # However, if we are within a major glibc 
upgrade, this may fail
-                               # in src_* phases since the new localedef links 
against the new
-                               # glibc, but the new glibc is not installed 
yet...
-                               if [[ -z ${INPLACE_GLIBC} ]] ; then
-                                       "${DESTDIR}"usr/bin/localedef \
-                                               --add-to-archive "${LOC}" \
-                                               --replace \
-                                               --prefix 
"${DESTDIR%${EPREFIX}/}/"
-                               else
-                                       # We assume that the current directory 
is "${ED}"/$(get_libdir),
-                                       # see the glibc ebuild, function 
glibc_sanity_check(), for why.
-                                       LC_ALL=C ./ld-*.so --library-path . \
-                                               "${DESTDIR}"usr/bin/localedef \
-                                               --add-to-archive "${LOC}" \
-                                               --replace \
-                                               --prefix 
"${DESTDIR%${EPREFIX}/}/"
-                               fi
-                               ret=$?
-                               if [[ -n ${output} ]] ; then
-                                       echo "${x}"
-                               elif [[ ${ret} -ne 0 ]] ; then
-                                       eerror "${disp}: ${x}"
-                               fi
-                               if [[ $ret -eq 0 ]]; then
-                                       rm -r "${LOC}"
-                               fi
-                               exit ${ret}
-                       )
-               done
-               [[ ${QUIET} -eq 0 ]] && eend ${ret}
-       elif [[ ${QUIET} -eq 0 ]] ; then
-               einfo "No locales are to be added to the archive."
-       fi
-fi
-
-# Remove locales that existed but were not requested
-if [[ -n ${UPDATE} ]] && [[ -z ${JUST_LIST} ]] ; then
-       # Ignore these pseudo locales
-       existing_locales=${existing_locales/ C / }
-       existing_locales=${existing_locales/ POSIX / }
-       if [[ -n ${existing_locales// } ]] ; then
-               if [[ -z ${KEEP} ]] ; then
-                       [[ ${QUIET} -eq 0 ]] && einfo "Scrubbing old 
locales:"${existing_locales}
-                       cd "${LOCALEDIR}" && rm -rf ${existing_locales}
-               else
-                       [[ ${QUIET} -eq 0 ]] && einfo "Keeping old 
locales:"${existing_locales}
-               fi
-       fi
-fi
-
-exit ${ret}
+sub redirect_stderr ($stderr, $callback) {
+       if (! open my $old_stderr, '>&', *STDERR) {
+               die "Can't dup STDERR to a new file descriptor: $!";
+       } elsif (! open *STDERR, '>&', $stderr) {
+               my $fileno = fileno $stderr;
+               die "Can't dup file descriptor #$fileno to STDERR: $!";
+       } else {
+               $callback->();
+               open *STDERR, '>&=', $old_stderr;
+       }
+}
+
+sub render_printable ($value) {
+       my $coder = JSON::PP->new->ascii->space_after;
+       return $coder->encode($value);
+}
+
+sub run ($cmd, @args) {
+       system $cmd, @args;
+       throw_child_error($cmd);
+}
+
+sub throw_child_error ($cmd, $status = $?) {
+       if ($status == -1) {
+               # The program could not be started. Since Perl will already
+               # have printed a warning, no supplemental diagnostic is needed.
+               exit 1;
+       } elsif ($status != 0) {
+               my $printable_cmd = render_printable($cmd);
+               my $fate = ($status & 0x7F) ? 'interrupted by a signal' : 
'unsuccessful';
+               die "$PROGRAM: Aborting because the execution of $printable_cmd 
was $fate\n";
+       }
+}
+
+sub trim_line ($line) {
+       return $line =~ s/^\h+|\h+$//gr;
+}
+
+END {
+       if ($$ == $PID) {
+               if (length $TEMPDIR) {
+                       local $?;
+                       system 'rm', '-r', '--', $TEMPDIR;
+               }
+
+               # The default SIGINT and SIGTERM handlers are suppressed by
+               # generate_locales. The former is especially important, per
+               # http://www.cons.org/cracauer/sigint.html.
+               if ($DEFERRED_SIGNAL) {
+                       kill $DEFERRED_SIGNAL, $$;
+               }
+       }
+}

Reply via email to