commit:     0193ee11e3aa99775c546ac63346ec05c01a51f7
Author:     Kerin Millar <kfm <AT> plushkava <DOT> net>
AuthorDate: Thu May  1 23:49:57 2025 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Fri May  2 06:06:27 2025 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=0193ee11

app-shells/bash: add 5.2_p37-r1 with several fixes

Backport two supplemental patches from the devel branch so as to address
some remaining defects affecting the revised read_mbchar() function.
For further information, please refer to pitfall #65 at
https://mywiki.wooledge.org/BashPitfalls, along with the commit messages
of the patches themselves. The read builtin should now behave as well as
it did in 4.4, and as well as it will in the upcoming 5.3-rc2 release.

Whitelist the Contour and Ghostty terminal emulators for colour support.

In the case that colour support is detected, refrain from defining any
aliases for which either an alias or function is found already to exist
by the same name. The motivation for making this change stems from a
discussion at https://forums.gentoo.org/viewtopic-t-1170323.html.

Define COLORTERM=1 in the environment of dircolors(1). The methods by
which dircolors determines whether colour is supported are inferior to
those of Gentoo. Indeed, at the point that dircolors is executed, it
will already have been determined that colour is supported with some
confidence. Defining COLORTERM=1 coerces dircolors into presuming so in
turn, increasing the range of terminal emulators for which Gentoo is
able to define LS_COLORS.

Define _cwd as a local variable in genfun_set_win_title(). Consequently,
_cwd is no longer a reserved variable name from the perspective of the
user.

Whitelist the Contour terminal emulator for XTWINOPS support.

Signed-off-by: Kerin Millar <kfm <AT> plushkava.net>
Signed-off-by: Sam James <sam <AT> gentoo.org>

 app-shells/bash/bash-5.2_p37-r1.ebuild             | 408 +++++++++++++++++++++
 ...32-erroneous-delimiter-pushback-condition.patch |  76 ++++
 ...-continuation-byte-ignored-as-delimiter-1.patch | 286 +++++++++++++++
 ...-continuation-byte-ignored-as-delimiter-2.patch | 146 ++++++++
 .../bash/files/bashrc.d/10-gentoo-color-r1.bash    |  73 ++++
 .../bash/files/bashrc.d/10-gentoo-title-r2.bash    |  83 +++++
 6 files changed, 1072 insertions(+)

diff --git a/app-shells/bash/bash-5.2_p37-r1.ebuild 
b/app-shells/bash/bash-5.2_p37-r1.ebuild
new file mode 100644
index 000000000000..b2a3e2144af2
--- /dev/null
+++ b/app-shells/bash/bash-5.2_p37-r1.ebuild
@@ -0,0 +1,408 @@
+# Copyright 1999-2024 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+VERIFY_SIG_OPENPGP_KEY_PATH=/usr/share/openpgp-keys/chetramey.asc
+inherit flag-o-matic toolchain-funcs prefix verify-sig
+
+# Uncomment if we have a patchset.
+#GENTOO_PATCH_DEV="sam"
+#GENTOO_PATCH_VER="${PV}"
+
+MY_PV=${PV/_p*}
+MY_PV=${MY_PV/_/-}
+MY_P=${PN}-${MY_PV}
+MY_PATCHES=()
+
+# Determine the patchlevel. See ftp://ftp.gnu.org/gnu/bash/bash-5.2-patches/.
+case ${PV} in
+       *_p*)
+               PLEVEL=${PV##*_p}
+               ;;
+       9999|*_alpha*|*_beta*|*_rc*)
+               # Set a negative patchlevel to indicate that it's a pre-release.
+               PLEVEL=-1
+               ;;
+       *)
+               PLEVEL=0
+esac
+
+# The version of readline this bash normally ships with. Note that we only use
+# the bundled copy of readline for pre-releases.
+READLINE_VER="8.2_p1"
+
+DESCRIPTION="The standard GNU Bourne again shell"
+HOMEPAGE="https://tiswww.case.edu/php/chet/bash/bashtop.html 
https://git.savannah.gnu.org/cgit/bash.git";
+
+if [[ ${PV} == 9999 ]]; then
+       EGIT_REPO_URI="https://git.savannah.gnu.org/git/bash.git";
+       EGIT_BRANCH=devel
+       inherit git-r3
+else
+       my_urls=( 
{'mirror://gnu/bash','ftp://ftp.cwru.edu/pub/bash'}/"${MY_P}.tar.gz" )
+
+       # bash-5.1 -> bash51
+       my_p=${PN}$(ver_cut 1-2) my_p=${my_p/.}
+
+       for (( my_patch_idx = 1; my_patch_idx <= PLEVEL; my_patch_idx++ )); do
+               printf -v my_patch_ver %s-%03d "${my_p}" "${my_patch_idx}"
+               my_urls+=( 
{'mirror://gnu/bash','ftp://ftp.cwru.edu/pub/bash'}/"${MY_P}-patches/${my_patch_ver}"
 )
+               MY_PATCHES+=( "${DISTDIR}/${my_patch_ver}" )
+       done
+
+       SRC_URI="${my_urls[*]} verify-sig? ( ${my_urls[*]/%/.sig} )"
+
+       unset -v my_urls my_p my_patch_idx my_patch_ver
+fi
+
+if [[ ${GENTOO_PATCH_VER} ]]; then
+       SRC_URI+=" 
https://dev.gentoo.org/~${GENTOO_PATCH_DEV:?}/distfiles/${CATEGORY}/${PN}/${PN}-${GENTOO_PATCH_VER:?}-patches.tar.xz";
+fi
+
+S=${WORKDIR}/${MY_P}
+
+LICENSE="GPL-3+"
+SLOT="0"
+if (( PLEVEL >= 0 )); then
+       KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~hppa ~loong ~m68k ~mips ~ppc 
~ppc64 ~riscv ~s390 ~sparc ~x86 ~amd64-linux ~x86-linux ~arm64-macos ~ppc-macos 
~x64-macos ~x64-solaris"
+fi
+IUSE="afs bashlogger examples mem-scramble +net nls plugins pgo +readline"
+
+DEPEND="
+       >=sys-libs/ncurses-5.2-r2:=
+       nls? ( virtual/libintl )
+"
+if (( PLEVEL >= 0 )); then
+       DEPEND+=" readline? ( >=sys-libs/readline-${READLINE_VER}:= )"
+fi
+RDEPEND="
+       ${DEPEND}
+"
+# We only need bison (yacc) when the .y files get patched (bash42-005, 
bash51-011).
+BDEPEND="
+       pgo? ( dev-util/gperf )
+       verify-sig? ( sec-keys/openpgp-keys-chetramey )
+"
+
+# EAPI 8 tries to append it but it doesn't exist here.
+QA_CONFIGURE_OPTIONS="--disable-static"
+
+PATCHES=(
+       #"${WORKDIR}"/${PN}-${GENTOO_PATCH_VER}/
+
+       # Patches to or from Chet, posted to the bug-bash mailing list.
+       "${FILESDIR}/${PN}-5.0-syslog-history-extern.patch"
+       "${FILESDIR}/${PN}-5.2_p15-random-ub.patch"
+       "${FILESDIR}/${PN}-5.2_p15-configure-clang16.patch"
+       "${FILESDIR}/${PN}-5.2_p21-wpointer-to-int.patch"
+       "${FILESDIR}/${PN}-5.2_p32-memory-leaks.patch"
+       
"${FILESDIR}/${PN}-5.2_p32-invalid-continuation-byte-ignored-as-delimiter-1.patch"
+       
"${FILESDIR}/${PN}-5.2_p32-invalid-continuation-byte-ignored-as-delimiter-2.patch"
+       "${FILESDIR}/${PN}-5.2_p32-erroneous-delimiter-pushback-condition.patch"
+)
+
+pkg_setup() {
+       # bug #7332
+       if is-flag -malign-double; then
+               eerror "Detected bad CFLAGS '-malign-double'.  Do not use this"
+               eerror "as it breaks LFS (struct stat64) on x86."
+               die "remove -malign-double from your CFLAGS mr ricer"
+       fi
+
+       if use bashlogger; then
+               ewarn "The logging patch should ONLY be used in restricted 
(i.e. honeypot) envs."
+               ewarn "This will log ALL output you enter into the shell, you 
have been warned."
+       fi
+}
+
+src_unpack() {
+       local patch
+
+       if [[ ${PV} == 9999 ]]; then
+               git-r3_src_unpack
+       else
+               if use verify-sig; then
+                       verify-sig_verify_detached 
"${DISTDIR}/${MY_P}.tar.gz"{,.sig}
+
+                       for patch in "${MY_PATCHES[@]}"; do
+                               verify-sig_verify_detached "${patch}"{,.sig}
+                       done
+               fi
+
+               unpack "${MY_P}.tar.gz"
+
+               if [[ ${GENTOO_PATCH_VER} ]]; then
+                       unpack "${PN}-${GENTOO_PATCH_VER}-patches.tar.xz"
+               fi
+       fi
+}
+
+src_prepare() {
+       # Include official patches.
+       (( PLEVEL > 0 )) && eapply -p0 "${MY_PATCHES[@]}"
+
+       # Clean out local libs so we know we use system ones w/releases. The
+       # touch utility is invoked for the benefit of config.status.
+       if (( PLEVEL >= 0 )); then
+               rm -rf lib/{readline,termcap}/* \
+               && touch lib/{readline,termcap}/Makefile.in \
+               && sed -i -E 
's:\$[{(](RL|HIST)_LIBSRC[)}]/[[:alpha:]_-]*\.h::g' Makefile.in \
+               || die
+       fi
+
+       # Prefixify hardcoded path names. No-op for non-prefix.
+       hprefixify pathnames.h.in
+
+       # Avoid regenerating docs after patches, bug #407985.
+       sed -i -E '/^(HS|RL)USER/s:=.*:=:' doc/Makefile.in \
+       && touch -r . doc/* \
+       || die
+
+       # Sometimes hangs (more noticeable w/ pgo), bug #907403.
+       rm tests/run-jobs || die
+
+       eapply -p0 "${PATCHES[@]}"
+       eapply_user
+}
+
+src_configure() {
+       local -a myconf
+
+       # Upstream only test with Bison and require GNUisms like YYEOF and
+       # YYERRCODE. The former at least may be in POSIX soon:
+       # https://www.austingroupbugs.net/view.php?id=1269.
+       # configure warns on use of non-Bison but doesn't abort. The result
+       # may misbehave at runtime.
+       unset -v YACC
+
+       # bash 5.3 drops unprototyped functions, earlier versions are
+       # incompatible with C23.
+       append-cflags $(test-flags-CC -std=gnu17)
+
+       myconf=(
+               --disable-profiling
+
+               # Force linking with system curses ... the bundled termcap lib
+               # sucks bad compared to ncurses.  For the most part, ncurses
+               # is here because readline needs it.  But bash itself calls
+               # ncurses in one or two small places :(.
+               --with-curses
+
+               $(use_enable mem-scramble)
+               $(use_enable net net-redirections)
+               $(use_enable readline)
+               $(use_enable readline bang-history)
+               $(use_enable readline history)
+               $(use_with afs)
+               $(use_with mem-scramble bash-malloc)
+       )
+
+       # For descriptions of these, see config-top.h.
+       # bashrc/#26952 bash_logout/#90488 ssh/#24762 mktemp/#574426
+       append-cppflags \
+               
-DDEFAULT_PATH_VALUE=\'\""${EPREFIX}"/usr/local/sbin:"${EPREFIX}"/usr/local/bin:"${EPREFIX}"/usr/sbin:"${EPREFIX}"/usr/bin:"${EPREFIX}"/sbin:"${EPREFIX}"/bin\"\'
 \
+               
-DSTANDARD_UTILS_PATH=\'\""${EPREFIX}"/bin:"${EPREFIX}"/usr/bin:"${EPREFIX}"/sbin:"${EPREFIX}"/usr/sbin\"\'
 \
+               -DSYS_BASHRC=\'\""${EPREFIX}"/etc/bash/bashrc\"\' \
+               -DSYS_BASH_LOGOUT=\'\""${EPREFIX}"/etc/bash/bash_logout\"\' \
+               -DNON_INTERACTIVE_LOGIN_SHELLS \
+               -DSSH_SOURCE_BASHRC \
+               $(use bashlogger && echo -DSYSLOG_HISTORY)
+
+       use nls || myconf+=( --disable-nls )
+
+       if (( PLEVEL >= 0 )); then
+               # Historically, we always used the builtin readline, but since
+               # our handling of SONAME upgrades has gotten much more stable
+               # in the PM (and the readline ebuild itself preserves the old
+               # libs during upgrades), linking against the system copy should
+               # be safe.
+               # Exact cached version here doesn't really matter as long as it
+               # is at least what's in the DEPEND up above.
+               export ac_cv_rl_version=${READLINE_VER%%_*}
+
+               # Use system readline only with released versions.
+               myconf+=( --with-installed-readline=. )
+       fi
+
+       if use plugins; then
+               append-ldflags "-Wl,-rpath,${EPREFIX}/usr/$(get_libdir)/bash"
+       else
+               # Disable the plugins logic by hand since bash doesn't provide
+               # a way of doing it.
+               export ac_cv_func_dl{close,open,sym}=no \
+                       ac_cv_lib_dl_dlopen=no ac_cv_header_dlfcn_h=no
+
+               sed -i -e '/LOCAL_LDFLAGS=/s:-rdynamic::' configure || die
+       fi
+
+       # bug #444070
+       tc-export AR
+
+       econf "${myconf[@]}"
+}
+
+src_compile() {
+       local -a pgo_generate_flags pgo_use_flags
+       local flag
+
+       # -fprofile-partial-training because upstream notes the test suite isn't
+       # super comprehensive.
+       # 
https://documentation.suse.com/sbp/all/html/SBP-GCC-10/index.html#sec-gcc10-pgo
+       if use pgo; then
+               pgo_generate_flags=(
+                       -fprofile-update=atomic
+                       -fprofile-dir="${T}"/pgo
+                       -fprofile-generate="${T}"/pgo
+               )
+               pgo_use_flags=(
+                       -fprofile-use="${T}"/pgo
+                       -fprofile-dir="${T}"/pgo
+               )
+               if flag=$(test-flags-CC -fprofile-partial-training); then
+                       pgo_generate_flags+=( "${flag}" )
+                       pgo_use_flags+=( "${flag}" )
+               fi
+       fi
+
+       emake CFLAGS="${CFLAGS} ${pgo_generate_flags[*]}"
+       use plugins && emake -C examples/loadables CFLAGS="${CFLAGS} 
${pgo_generate_flags[*]}" all others
+
+       # Build Bash and run its tests to generate profiles.
+       if (( ${#pgo_generate_flags[@]} )); then
+               # Used in test suite.
+               unset -v A
+
+               emake CFLAGS="${CFLAGS} ${pgo_generate_flags[*]}" -k check
+
+               if tc-is-clang; then
+                       llvm-profdata merge "${T}"/pgo 
--output="${T}"/pgo/default.profdata || die
+               fi
+
+               # Rebuild Bash using the profiling data we just generated.
+               emake clean
+               emake CFLAGS="${CFLAGS} ${pgo_use_flags[*]}"
+               use plugins && emake -C examples/loadables CFLAGS="${CFLAGS} 
${pgo_use_flags[*]}" all others
+       fi
+}
+
+src_test() {
+       # Used in test suite.
+       unset -v A
+
+       default
+}
+
+src_install() {
+       local d f
+
+       default
+
+       my_prefixify() {
+               while read -r; do
+                       if [[ $REPLY == *$1* ]]; then
+                               REPLY=${REPLY/"/etc/"/"${EPREFIX}/etc/"}
+                       fi
+                       printf '%s\n' "${REPLY}" || ! break
+               done < "$2" || die
+       }
+
+       dodir /bin
+       mv -- "${ED}"/usr/bin/bash "${ED}"/bin/ || die
+       dosym bash /bin/rbash
+
+       insinto /etc/bash
+       doins "${FILESDIR}"/bash_logout
+       my_prefixify bashrc.d "${FILESDIR}"/bashrc-r1 | newins - bashrc
+
+       insinto /etc/bash/bashrc.d
+       my_prefixify DIR_COLORS "${FILESDIR}"/bashrc.d/10-gentoo-color-r1.bash 
| newins - 10-gentoo-color.bash
+       newins "${FILESDIR}"/bashrc.d/10-gentoo-title-r2.bash 
10-gentoo-title.bash
+       if [[ ! ${EPREFIX} ]]; then
+               doins "${FILESDIR}"/bashrc.d/15-gentoo-bashrc-check.bash
+       fi
+
+       insinto /etc/skel
+       for f in bash{_logout,_profile,rc}; do
+               newins "${FILESDIR}/dot-${f}" ".${f}"
+       done
+
+       if use plugins; then
+               exeinto "/usr/$(get_libdir)/bash"
+               set -- examples/loadables/*.o
+               doexe "${@%.o}"
+
+               insinto /usr/include/bash-plugins
+               doins *.h builtins/*.h include/*.h 
lib/{glob/glob.h,tilde/tilde.h}
+       fi
+
+       if use examples; then
+               for d in examples/{functions,misc,scripts,startup-files}; do
+                       exeinto "/usr/share/doc/${PF}/${d}"
+                       docinto "${d}"
+                       for f in "${d}"/*; do
+                               if [[ ${f##*/} != @(PERMISSION|*README) ]]; then
+                                       doexe "${f}"
+                               else
+                                       dodoc "${f}"
+                               fi
+                       done
+               done
+       fi
+
+       # Install bash_builtins.1 and rbash.1.
+       emake -C doc DESTDIR="${D}" install_builtins
+       sed 's:bash\.1:man1/&:' doc/rbash.1 > "${T}"/rbash.1 || die
+       doman "${T}"/rbash.1
+
+       newdoc CWRU/changelog ChangeLog
+       dosym bash.info /usr/share/info/bashref.info
+}
+
+pkg_preinst() {
+       if [[ -e ${EROOT}/etc/bashrc ]] && [[ ! -d ${EROOT}/etc/bash ]]; then
+               mkdir -p -- "${EROOT}"/etc/bash \
+               && mv -f -- "${EROOT}"/etc/bashrc "${EROOT}"/etc/bash/ \
+               || die
+       fi
+}
+
+pkg_postinst() {
+       local old_ver
+
+       # If /bin/sh does not exist, provide it.
+       if [[ ! -e ${EROOT}/bin/sh ]]; then
+               ln -sf -- bash "${EROOT}"/bin/sh || die
+       fi
+
+       read -r old_ver <<<"${REPLACING_VERSIONS}"
+       if [[ ! $old_ver ]]; then
+               :
+       elif ver_test "$old_ver" -ge "5.2" && ver_test "$old_ver" -ge 
"5.2_p26-r8"; then
+               return
+       fi
+
+       while read -r; do ewarn "${REPLY}"; done <<'EOF'
+Files under /etc/bash/bashrc.d must now have a suffix of .sh or .bash.
+
+Gentoo now defaults to defining PROMPT_COMMAND as an array. Depending on the
+characteristics of the operating environment, it may contain a command to set
+the terminal's window title. Those who were already choosing to customise the
+PROMPT_COMMAND variable are now advised to append their commands like so:
+
+PROMPT_COMMAND+=('custom command goes here')
+
+Gentoo no longer defaults to having bash set the window title in the case
+that the terminal is controlled by sshd(8), unless screen is launched on the
+remote side or the terminal reliably supports saving and restoring the title
+(as alacritty, foot and tmux do). Those wanting for the title to be set
+regardless may adjust ~/.bashrc - or create a custom /etc/bash/bashrc.d
+drop-in - to set PROMPT_COMMMAND like so:
+
+PROMPT_COMMAND=(genfun_set_win_title)
+
+Those who would prefer for bash never to interfere with the window title may
+now opt out of the default title setting behaviour, either with the "unset -v
+PROMPT_COMMAND" command or by re-defining PROMPT_COMMAND as desired.
+EOF
+}

diff --git 
a/app-shells/bash/files/bash-5.2_p32-erroneous-delimiter-pushback-condition.patch
 
b/app-shells/bash/files/bash-5.2_p32-erroneous-delimiter-pushback-condition.patch
new file mode 100644
index 000000000000..6e8000952644
--- /dev/null
+++ 
b/app-shells/bash/files/bash-5.2_p32-erroneous-delimiter-pushback-condition.patch
@@ -0,0 +1,76 @@
+From 57fbf1c67f2d89365601f39e72781fba001fe2f3 Mon Sep 17 00:00:00 2001
+From: Kerin Millar <[email protected]>
+Date: Mon, 28 Apr 2025 08:13:38 +0100
+Subject: [PATCH 3/3] Backport fix for erroneous delimiter pushback condition
+ in read_mbchar
+
+This is a partial backport of commit 7731dc5c4d405ab147fc562e3af2a375ca593554
+from the devel branch. Consider the following test case.
+
+$ LC_ALL=en_US.UTF-8
+$ printf 'FOO\0\315\0\226\0' | while read -rd ''; do echo "${REPLY@Q}"; done
+
+With any vanilla 5.0, 5.1 or 5.2 release, the third record is disregarded.
+
+<FOO>
+<$'\315'>
+
+With 5.3-rc1, the third record is treated as if it were two empty
+records. The same is true of Gentoo's 5.2_p37 release.
+
+'FOO'
+$'\315'
+''
+''
+
+With the upcoming 5.3-rc2, which will incoprorate this patch, all three
+records are read correctly.
+
+<FOO>
+<$'\315'>
+<$'\226'>
+
+The issue is addressed by ensuring that the revised read_mbchar()
+routine refrains from pushing back the delimiter - while effectively
+truncating the mbchar buffer by writing a NUL byte - in cases where the
+delimiter character was not read by the same routine.
+
+As of the time of writing, the issue has not been addressed by any of
+the official patchlevels, nor has 5.3 been released.
+
+Link: 
https://pubs.opengroup.org/onlinepubs/9799919799.2024edition/utilities/read.html#tag_20_100_06
+Link: 
https://mywiki.wooledge.org/BashPitfalls#IFS.3D_read_-r_-d_.27.27_filename
+Link: https://lists.gnu.org/archive/html/bug-bash/2025-04/msg00065.html
+Signed-off-by: Kerin Millar <[email protected]>
+---
+ builtins/read.def | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git builtins/read.def builtins/read.def
+index 9fd9a74c..8000def3 100644
+--- builtins/read.def
++++ builtins/read.def
+@@ -1102,14 +1102,14 @@ read_mbchar (fd, string, ind, ch, delim, unbuffered)
+       }
+       else if (ret == (size_t)-1)
+       {
+-        /* If we read a delimiter character that makes this an invalid
+-           multibyte character, we can't just add it to the input string
+-           and treat it as a byte. We need to push it back so a subsequent
+-           zread will pick it up. */
+-        if ((unsigned char)c == delim)
++        /* If we read (i > 1) a delimiter character (c == delimiter)
++           that makes this an invalid multibyte character, we can't just
++           add it to the input string and treat it as a byte.
++           We need to push it back so a subsequent zread will pick it up. */
++        if (i > 1 && (unsigned char)c == delim)
+           {
+             zungetc ((unsigned char)c);
+-            mbchar[--i] = '\0';               /* unget the delimiter */
++            i--;
+           }
+         break;                /* invalid multibyte character */
+       }
+-- 
+2.49.0
+

diff --git 
a/app-shells/bash/files/bash-5.2_p32-invalid-continuation-byte-ignored-as-delimiter-1.patch
 
b/app-shells/bash/files/bash-5.2_p32-invalid-continuation-byte-ignored-as-delimiter-1.patch
new file mode 100644
index 000000000000..949f5b2f7abc
--- /dev/null
+++ 
b/app-shells/bash/files/bash-5.2_p32-invalid-continuation-byte-ignored-as-delimiter-1.patch
@@ -0,0 +1,286 @@
+From 2482e34ac12910f7e7436a968caf97ab96f8b582 Mon Sep 17 00:00:00 2001
+From: Kerin Millar <[email protected]>
+Date: Sat, 26 Apr 2025 08:43:58 +0100
+Subject: [PATCH 1/3] Backport fix for invalid continuation bytes being ignored
+ as delimiters
+
+This is a partial backport of commit 772e7e760e8a098e4d8dee21cf11090be4757918
+from the devel branch. It addresses an issue in read_mbchar() whereby
+the read builtin can read past the delimiter character, provided that is
+invoked with a multibyte character set in effect. Consider the following
+test case.
+
+$ LC_ALL=en_US.UTF-8
+$ for i in {194..245}; do printf -v o %o "$i"; printf "\\$o\\n"; done |
+  while read -r; do declare -p REPLY; done
+declare -- REPLY=$'\302\n\303\n\304\n\305\n\306\n\307\n\310\n\311\n\312\
+n\313\n\314\n\315\n\316\n\317\n\320\n\321\n\322\n\323\n\324\n\325\n\326\
+n\327\n\330\n\331\n\332\n\333\n\334\n\335\n\336\n\337\n\340\n\341\n\342\
+n\343\n\344\n\345\n\346\n\347\n\350\n\351\n\352\n\353\n\354\n\355\n\356\
+n\357\n\360\n\361\n\362\n\363\n\364\n\365'
+
+The producing loop emits a sequence of bytes in the range 0xC2 - 0xF5.
+Since each is terminated by a <newline> character, one would expect for
+exactly 52 iterations of the consuming loop, with REPLY being assigned a
+single byte each time. Instead, the input is read in its entirety. Why
+is that, one may ask.
+
+Given a legal UTF-8 byte sequence, any bytes whose values are between
+0xC2 - 0xF4 are combinative in nature; they can only be followed by
+between one and three bytes that are outside of that range.
+
+0xC2 - 0xDF : First byte of a 2-byte code unit sequence
+0xE0 - 0xEF : First byte of a 3-byte code unit sequence
+0xF0 - 0xF4 : First byte of a 4-byte code unit sequence
+
+As such, bash begins by reading the 0xC2 byte, for which mbrtowc(3)
+returns -2, indicating an incomplete multibyte sequence. Next, the 0x0A
+byte is read, for which mbrtowc(3) returns -1, indicating an invalid
+multibyte sequence. At this point, bash ought to recognise the most
+recently read byte as a delimiter. Instead, it continues reading the
+input stream up until the delimiter that follows 0xF5, which is neither
+a combining character nor legal in UTF-8 in any capacity.
+
+This patch addresses the issue by introducing the zungetc() function,
+which is used by read_mbchar() to push back the delimiter character that
+transforms the sequence from an incomplete one to an invalid one. Said
+character is then detected by the next invocation of the zread()
+function, allowing for the decision to be made to return.
+
+With this, the output of the test case amounts to 52 lines, as expected.
+
+declare -- REPLY=$'\302'
+declare -- REPLY=$'\303'
+...
+declare -- REPLY=$'\364'
+declare -- REPLY=$'\365'
+
+The issue affects all bash releases from 5.0 to 5.3-alpha. As of the
+time of writing, it has not been addressed by any of the official
+patchlevels, nor has 5.3 been released.
+
+Link: 
https://pubs.opengroup.org/onlinepubs/9799919799.2024edition/utilities/read.html#tag_20_100_06
+Link: 
https://mywiki.wooledge.org/BashPitfalls#IFS.3D_read_-r_-d_.27.27_filename
+Link: https://lists.gnu.org/archive/html/bug-bash/2025-04/msg00068.html
+Signed-off-by: Kerin Millar <[email protected]>
+---
+ builtins/read.def | 25 ++++++++++++----
+ externs.h         |  1 +
+ lib/sh/zread.c    | 74 +++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 94 insertions(+), 6 deletions(-)
+
+diff --git builtins/read.def builtins/read.def
+index ddd91d32..53b4bd81 100644
+--- builtins/read.def
++++ builtins/read.def
+@@ -130,7 +130,7 @@ static void set_readline_timeout PARAMS((sh_timer *t, 
time_t, long));
+ #endif
+ static SHELL_VAR *bind_read_variable PARAMS((char *, char *, int));
+ #if defined (HANDLE_MULTIBYTE)
+-static int read_mbchar PARAMS((int, char *, int, int, int));
++static int read_mbchar PARAMS((int, char *, int, int, int, int));
+ #endif
+ static void ttyrestore PARAMS((struct ttsave *));
+ 
+@@ -806,7 +806,7 @@ add_char:
+         else
+ #  endif
+         if (locale_utf8locale == 0 || ((c & 0x80) != 0))
+-          i += read_mbchar (fd, input_string, i, c, unbuffered_read);
++          i += read_mbchar (fd, input_string, i, c, delim, unbuffered_read);
+       }
+ #endif
+ 
+@@ -1064,10 +1064,10 @@ bind_read_variable (name, value, flags)
+ 
+ #if defined (HANDLE_MULTIBYTE)
+ static int
+-read_mbchar (fd, string, ind, ch, unbuffered)
++read_mbchar (fd, string, ind, ch, delim, unbuffered)
+      int fd;
+      char *string;
+-     int ind, ch, unbuffered;
++     int ind, ch, delim, unbuffered;
+ {
+   char mbchar[MB_LEN_MAX + 1];
+   int i, n, r;
+@@ -1101,8 +1101,21 @@ read_mbchar (fd, string, ind, ch, unbuffered)
+         mbchar[i++] = c;      
+         continue;
+       }
+-      else if (ret == (size_t)-1 || ret == (size_t)0 || ret > (size_t)0)
+-      break;
++      else if (ret == (size_t)-1)
++      {
++        /* If we read a delimiter character that makes this an invalid
++           multibyte character, we can't just add it to the input string
++           and treat it as a byte. We need to push it back so a subsequent
++           zread will pick it up. */
++        if (c == delim)
++          {
++            zungetc (c);
++            mbchar[--i] = '\0';               /* unget the delimiter */
++          }
++        break;                /* invalid multibyte character */
++      }
++      else if (ret == (size_t)0 || ret > (size_t)0)
++      break;          /* valid multibyte character */
+     }
+ 
+ mbchar_return:
+diff --git externs.h externs.h
+index 931dba9c..1b70a13b 100644
+--- externs.h
++++ externs.h
+@@ -536,6 +536,7 @@ extern ssize_t zreadintr PARAMS((int, char *, size_t));
+ extern ssize_t zreadc PARAMS((int, char *));
+ extern ssize_t zreadcintr PARAMS((int, char *));
+ extern ssize_t zreadn PARAMS((int, char *, size_t));
++extern int zungetc PARAMS((int));
+ extern void zreset PARAMS((void));
+ extern void zsyncfd PARAMS((int));
+ 
+diff --git lib/sh/zread.c lib/sh/zread.c
+index dafb7f60..7cfbb288 100644
+--- lib/sh/zread.c
++++ lib/sh/zread.c
+@@ -41,6 +41,10 @@ extern int errno;
+ #  define ZBUFSIZ 4096
+ #endif
+ 
++#ifndef EOF
++#  define EOF -1
++#endif
++
+ extern int executing_builtin;
+ 
+ extern void check_signals_and_traps (void);
+@@ -48,6 +52,11 @@ extern void check_signals (void);
+ extern int signal_is_trapped (int);
+ extern int read_builtin_timeout (int);
+ 
++int zungetc (int);
++
++/* Provide one character of pushback whether we are using read or zread. */
++static int zpushedchar = -1;
++
+ /* Read LEN bytes from FD into BUF.  Retry the read on EINTR.  Any other
+    error causes the loop to break. */
+ ssize_t
+@@ -59,6 +68,15 @@ zread (fd, buf, len)
+   ssize_t r;
+ 
+   check_signals ();   /* check for signals before a blocking read */
++
++  /* If we pushed a char back, return it immediately */
++  if (zpushedchar != -1)
++    {
++      *buf = (unsigned char)zpushedchar;
++      zpushedchar = -1;
++      return 1;
++    }
++
+   /* should generalize into a mechanism where different parts of the shell can
+      `register' timeouts and have them checked here. */
+   while (((r = read_builtin_timeout (fd)) < 0 || (r = read (fd, buf, len)) < 
0) &&
+@@ -95,6 +113,14 @@ zreadretry (fd, buf, len)
+   ssize_t r;
+   int nintr;
+ 
++  /* If we pushed a char back, return it immediately */
++  if (zpushedchar != -1)
++    {
++      *buf = (unsigned char)zpushedchar;
++      zpushedchar = -1;
++      return 1;
++    }
++
+   for (nintr = 0; ; )
+     {
+       r = read (fd, buf, len);
+@@ -118,6 +144,15 @@ zreadintr (fd, buf, len)
+      size_t len;
+ {
+   check_signals ();
++
++  /* If we pushed a char back, return it immediately */
++  if (zpushedchar != -1)
++    {
++      *buf = (unsigned char)zpushedchar;
++      zpushedchar = -1;
++      return 1;
++    }
++
+   return (read (fd, buf, len));
+ }
+ 
+@@ -135,6 +170,14 @@ zreadc (fd, cp)
+ {
+   ssize_t nr;
+ 
++  /* If we pushed a char back, return it immediately */
++  if (zpushedchar != -1 && cp)
++    {
++      *cp = (unsigned char)zpushedchar;
++      zpushedchar = -1;
++      return 1;
++    }
++
+   if (lind == lused || lused == 0)
+     {
+       nr = zread (fd, lbuf, sizeof (lbuf));
+@@ -160,6 +203,14 @@ zreadcintr (fd, cp)
+ {
+   ssize_t nr;
+ 
++  /* If we pushed a char back, return it immediately */
++  if (zpushedchar != -1 && cp)
++    {
++      *cp = (unsigned char)zpushedchar;
++      zpushedchar = -1;
++      return 1;
++    }
++
+   if (lind == lused || lused == 0)
+     {
+       nr = zreadintr (fd, lbuf, sizeof (lbuf));
+@@ -186,6 +237,13 @@ zreadn (fd, cp, len)
+ {
+   ssize_t nr;
+ 
++  if (zpushedchar != -1 && cp)
++    {
++      *cp = zpushedchar;
++      zpushedchar = -1;
++      return 1;
++    }
++
+   if (lind == lused || lused == 0)
+     {
+       if (len > sizeof (lbuf))
+@@ -204,6 +262,22 @@ zreadn (fd, cp, len)
+   return 1;
+ }
+ 
++int
++zungetc (c)
++     int c;
++{
++  if (zpushedchar == -1)
++    {
++      zpushedchar = c;
++      return c;
++    }
++
++  if (c == EOF || lind == 0)
++    return (EOF);
++  lbuf[--lind] = c;           /* XXX */
++  return c;
++}
++
+ void
+ zreset ()
+ {
+-- 
+2.49.0
+

diff --git 
a/app-shells/bash/files/bash-5.2_p32-invalid-continuation-byte-ignored-as-delimiter-2.patch
 
b/app-shells/bash/files/bash-5.2_p32-invalid-continuation-byte-ignored-as-delimiter-2.patch
new file mode 100644
index 000000000000..ff6fdf25414b
--- /dev/null
+++ 
b/app-shells/bash/files/bash-5.2_p32-invalid-continuation-byte-ignored-as-delimiter-2.patch
@@ -0,0 +1,146 @@
+From fb31e1965baa732f9e4cdbe3a7d6691d7beb4e03 Mon Sep 17 00:00:00 2001
+From: Kerin Millar <[email protected]>
+Date: Mon, 28 Apr 2025 07:59:25 +0100
+Subject: [PATCH 2/3] Backport fix for invalid continuation bytes above 0x7F
+ being ignored as delimiters
+
+This is a partial backport of commit e327891b52513bef0b34aac625c44f8fa6811f53
+from the devel branch. It addresses an issue in read_mbchar() whereby an
+invalid continuation byte greater than 0x7F isn't recognised as a valid
+delimiter on platforms where char is signed. Consider the following test
+case.
+
+$ LC_ALL=en_US.UTF-8; uname -m
+x86_64
+$ printf '\317\360_' | { read -rd $'\360'; echo "${REPLY@Q}"; }
+$'\317\360_'
+
+After applying this patch, the value of REPLY will be $'\317'.
+
+The issue affects all bash releases from 5.0 to 5.3-rc1. As of the time
+of writing, it has not been addressed by any of the official
+patchlevels, nor has 5.3 been released.
+
+Link: 
https://pubs.opengroup.org/onlinepubs/9799919799.2024edition/utilities/read.html#tag_20_100_06
+Link: 
https://mywiki.wooledge.org/BashPitfalls#IFS.3D_read_-r_-d_.27.27_filename
+Link: https://lists.gnu.org/r/bug-bash/2024-08/msg00100.html
+Signed-off-by: Kerin Millar <[email protected]>
+---
+ builtins/read.def | 40 +++++++++++++++++++++++-----------------
+ 1 file changed, 23 insertions(+), 17 deletions(-)
+
+diff --git builtins/read.def builtins/read.def
+index 53b4bd81..9fd9a74c 100644
+--- builtins/read.def
++++ builtins/read.def
+@@ -142,7 +142,7 @@ sh_timer *read_timeout;
+ 
+ static int reading, tty_modified;
+ static SigHandler *old_alrm;
+-static unsigned char delim;
++static int delim;
+ 
+ static struct ttsave termsave;
+ 
+@@ -320,7 +320,6 @@ read_builtin (list)
+         break;
+       case 'N':
+         ignore_delim = 1;
+-        delim = -1;
+       case 'n':
+         nflag = 1;
+         code = legal_number (list_optarg, &intval);
+@@ -348,7 +347,7 @@ read_builtin (list)
+           }
+         break;
+       case 'd':
+-        delim = *list_optarg;
++        delim = (unsigned char)*list_optarg;
+         break;
+       CASE_HELPOPT;
+       default:
+@@ -765,7 +764,7 @@ read_builtin (list)
+         continue;
+       }
+ 
+-      if (ignore_delim == 0 && (unsigned char)c == delim)
++      if ((unsigned char)c == delim)
+       break;
+ 
+       if (c == '\0' && delim != '\0')
+@@ -1107,9 +1106,9 @@ read_mbchar (fd, string, ind, ch, delim, unbuffered)
+            multibyte character, we can't just add it to the input string
+            and treat it as a byte. We need to push it back so a subsequent
+            zread will pick it up. */
+-        if (c == delim)
++        if ((unsigned char)c == delim)
+           {
+-            zungetc (c);
++            zungetc ((unsigned char)c);
+             mbchar[--i] = '\0';               /* unget the delimiter */
+           }
+         break;                /* invalid multibyte character */
+@@ -1220,6 +1219,8 @@ edit_line (p, itext)
+   len = strlen (ret);
+   ret = (char *)xrealloc (ret, len + 2);
+   ret[len++] = delim;
++  if (delim > 0)
++    ret[len++] = delim;
+   ret[len] = '\0';
+   return ret;
+ }
+@@ -1240,7 +1241,7 @@ static rl_command_func_t *old_delim_func;
+ static int old_newline_ctype;
+ static rl_command_func_t *old_newline_func;
+ 
+-static unsigned char delim_char;
++static int delim_char;
+ 
+ static void
+ set_eol_delim (c)
+@@ -1252,19 +1253,21 @@ set_eol_delim (c)
+     initialize_readline ();
+   cmap = rl_get_keymap ();
+ 
+-  /* Save the old delimiter char binding */
++  /* Save the old newline binding and change it to self-insert */
+   old_newline_ctype = cmap[RETURN].type;
+   old_newline_func =  cmap[RETURN].function;
+-  old_delim_ctype = cmap[c].type;
+-  old_delim_func = cmap[c].function;
+-
+-  /* Change newline to self-insert */
+   cmap[RETURN].type = ISFUNC;
+   cmap[RETURN].function = rl_insert;
+ 
+-  /* Bind the delimiter character to accept-line. */
+-  cmap[c].type = ISFUNC;
+-  cmap[c].function = rl_newline;
++  /* Save any binding to the delimiter and bind the delimiter to accept-line 
*/
++  if (c >= 0)
++    {
++      old_delim_ctype = cmap[c].type;
++      old_delim_func = cmap[c].function;
++
++      cmap[c].type = ISFUNC;
++      cmap[c].function = rl_newline;
++    }
+ 
+   delim_char = c;
+ }
+@@ -1280,7 +1283,10 @@ reset_eol_delim (cp)
+   cmap[RETURN].type = old_newline_ctype;
+   cmap[RETURN].function = old_newline_func;
+ 
+-  cmap[delim_char].type = old_delim_ctype;
+-  cmap[delim_char].function = old_delim_func;
++  if (delim_char >= 0)
++    {
++      cmap[delim_char].type = old_delim_ctype;
++      cmap[delim_char].function = old_delim_func;
++    }
+ }
+ #endif
+-- 
+2.49.0
+

diff --git a/app-shells/bash/files/bashrc.d/10-gentoo-color-r1.bash 
b/app-shells/bash/files/bashrc.d/10-gentoo-color-r1.bash
new file mode 100644
index 000000000000..72693cb33aa5
--- /dev/null
+++ b/app-shells/bash/files/bashrc.d/10-gentoo-color-r1.bash
@@ -0,0 +1,73 @@
+# /etc/bash/bashrc.d/10-gentoo-color.bash
+
+if [[ ${NO_COLOR} ]]; then
+       # Respect the user's wish not to use color. See https://no-color.org/.
+       gentoo_color=0
+elif [[ ${COLORTERM@a} == *x* && ${COLORTERM} == @(24bit|truecolor) ]]; then
+       # The COLORTERM environment variable can reasonably be trusted here.
+       # See https://github.com/termstandard/colors for further information.
+       gentoo_color=1
+elif unset -v COLORTERM; ! gentoo_color=$(tput colors 2>/dev/null); then
+       # Either ncurses is not installed or no terminfo database could be
+       # found. Fall back to a whitelist which covers the majority of terminal
+       # emulators and virtual console implementations known to support color
+       # and which remain (somewhat) popular. This will rarely happen, so the
+       # list need not be exhaustive.
+       case ${TERM} in
+               *color*    |\
+               *direct*   |\
+               *ghostty   |\
+               [Ekx]term* |\
+               alacritty  |\
+               aterm      |\
+               contour    |\
+               dtterm     |\
+               foot*      |\
+               jfbterm    |\
+               linux      |\
+               mlterm     |\
+               rxvt*      |\
+               screen*    |\
+               tmux*      |\
+               wsvt25*    ) gentoo_color=1
+       esac
+elif (( gentoo_color == 16777216 )); then
+       # Truecolor support is available. Advertise it.
+       export COLORTERM=truecolor
+fi
+
+# For direxpand to be missing indicates that bash is lacking readline support.
+if (( gentoo_color <= 0 )) || [[ ! $(shopt -p direxpand 2>/dev/null) ]]; then
+       # Define a prompt without color.
+       PS1='\u@\h \w \$ '
+elif (( EUID == 0 )); then
+       # If root, omit the username and print the hostname in red.
+       PS1='\[\e[01;31m\]\h\[\e[01;34m\] \w \$\[\e[00m\] '
+else
+       # Otherwise, print the username and hostname in green.
+       PS1='\[\e[01;32m\]\u@\h\[\e[01;34m\] \w \$\[\e[00m\] '
+fi
+
+if (( gentoo_color > 0 )); then
+       # Colorize the output of diff(1), grep(1) and a few coreutils utilities.
+       # However, do so only where no alias/function by the given name exists.
+       for _ in diff dir grep ls vdir; do
+               if [[ $(type -t "$_") == file ]]; then
+                       alias "$_=$_ --color=auto"
+               fi
+       done
+
+       # Enable colors for ls(1) and some other utilities that respect the
+       # LS_COLORS variable. Prefer ~/.dir_colors, per bug #64489.
+       if hash dircolors 2>/dev/null; then
+               if [[ -f ~/.dir_colors ]]; then
+                       eval "$(COLORTERM=1 dircolors -b -- ~/.dir_colors)"
+               elif [[ -f /etc/DIR_COLORS ]]; then
+                       eval "$(COLORTERM=1 dircolors -b /etc/DIR_COLORS)"
+               else
+                       eval "$(COLORTERM=1 dircolors -b)"
+               fi
+       fi
+fi
+
+unset -v gentoo_color

diff --git a/app-shells/bash/files/bashrc.d/10-gentoo-title-r2.bash 
b/app-shells/bash/files/bashrc.d/10-gentoo-title-r2.bash
new file mode 100644
index 000000000000..c1c560defb7c
--- /dev/null
+++ b/app-shells/bash/files/bashrc.d/10-gentoo-title-r2.bash
@@ -0,0 +1,83 @@
+# /etc/bash/bashrc.d/10-gentoo-title.bash
+
+# For information regarding the control sequences used, please refer to
+# https://invisible-island.net/xterm/ctlseqs/ctlseqs.html.
+
+genfun_set_win_title() {
+       # Advertise the fact that the presently running interactive shell will
+       # update the title. Doing so allows for its subprocesses to determine
+       # whether it is safe to set the title of their own accord. Note that 0
+       # refers to the value of Ps within the OSC Ps ; Pt BEL sequence.
+       export SHELL_SETS_TITLE=0
+
+       # Assigns the basename of the current working directory, having
+       # sanitised it with @Q parameter expansion. Useful for paths containing
+       # newlines and such. As a special case, names consisting entirely of
+       # graphemes shall not undergo the expansion, for reasons of cleanliness.
+       genfun_sanitise_cwd() {
+               _cwd=${PWD##*/}
+               if [[ ! ${_cwd} ]]; then
+                       _cwd=${PWD}
+               elif [[ ${_cwd} == *[![:graph:]]* ]]; then
+                       _cwd=${_cwd@Q}
+               fi
+       }
+
+       # Sets the window title with the Set Text Parameters control sequence.
+       # For screen, the sequence defines the hardstatus (%h) and for tmux, the
+       # pane_title (#T). For graphical terminal emulators, it is normal for
+       # the title bar to be affected.
+       genfun_set_win_title() {
+               local _cwd
+
+               genfun_sanitise_cwd
+               printf '\033]0;%s@%s - %s\007' "${USER}" "${HOSTNAME%%.*}" 
"${_cwd}"
+       }
+
+       genfun_set_win_title
+}
+
+unset -v SHELL_SETS_TITLE
+
+# Determine whether the terminal can handle the Set Text Parameters sequence.
+# The only terminals permitted here are those for which there is empirical
+# evidence that the sequence is supported and that the UTF-8 character encoding
+# is handled correctly. Quite rightly, this precludes many vintage terminals.
+case ${TERM} in
+       alacritty*|contour|foot*|tmux*)
+               # The terminal emulator also supports XTWINOPS. If the PTY was
+               # created by sshd(8) then push the current window title to the
+               # stack and arrange for it to be popped upon exiting. Xterm also
+               # supports this but there are far too many terminal emulators
+               # that falsely identify as being xterm-compatible.
+               if [[ ${SSH_TTY} && ${SSH_TTY} == "$(tty)" ]]; then
+                       trap 'printf "\033[23;0t"' EXIT
+                       printf '\033[22;0t'
+               fi
+               ;;
+       rxvt-unicode*|st-256color|xterm*)
+               # If the PTY was created by sshd(8) then proceed no further.
+               # Alas, there exist many operating environments in which the
+               # title would otherwise not be restored upon ssh(1) exiting.
+               # Those wanting for the title to be set regardless may adjust
+               # ~/.bashrc or create a bashrc.d drop-in to set PROMPT_COMMAND.
+               # For example, PROMPT_COMMAND=(genfun_set_win_title).
+               if [[ ${SSH_TTY} && ${SSH_TTY} == "$(tty)" ]]; then
+                       return
+               fi
+               ;;
+       screen*)
+               # If the PTY was created by sshd(8) and screen(1) was launched
+               # prior to the SSH session beginning, as opposed to afterwards,
+               # proceed no further. It is another case in which there would be
+               # no guarantee of the title being restored upon ssh(1) exiting.
+               if [[ ! ${WINDOW} && ${SSH_TTY} && ${SSH_TTY} == "$(tty)" ]]; 
then
+                       return
+               fi
+               ;;
+       *)
+               return
+esac
+
+# Arrange for the title to be updated each time the primary prompt is 
displayed.
+PROMPT_COMMAND+=('genfun_set_win_title')


Reply via email to