commit: 3e89139fae34c9bd2e2b4c0490512f71d1d78546 Author: Kerin Millar <kfm <AT> plushkava <DOT> net> AuthorDate: Tue Sep 2 10:07:57 2025 +0000 Commit: Sam James <sam <AT> gentoo <DOT> org> CommitDate: Mon Sep 15 10:37:56 2025 +0000 URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=3e89139f
phase-helpers.sh: refactor unpack() and have tar execute its decompressors This commit subjects the unpack() function to an overall refactoring while incorporating a fix for a regression that I was aware of and which has since been independently identified and filed as bug #962874. The changes are described herewith. Declare all of the supported archive suffixes in a fashion that mirrors the language of PMS. To wit, all documented suffixes are initially declared as elements of the 'suffixes' array, case-sensitively. In turn, that means that the following suffixes are now considered as first class entities, as opposed to being supported as a consequence of the machinations of the __unpack_tar() function. - tar.bz - tar.bz2 - tar.gz - tar.lzma - tar.Z After having composed the 'suffixes' array, declare an associative array by the name of 'suffix_by', where each key of the latter is an element of the former. In the case that the effective EAPI is found not to be case-sensitive, exploit the 'l' attribute of the declare/typeset builtin, by which bash is able to induce a lowercase conversion upon assignment. The latter array is prepared before the main loop begins and makes it easy to determine whether a given suffix is recognised, whilst doing away with the code pertaining to the now defunct 'suffix_known' variable (which was not particularly digestible). Declare and localise the 'XZ_OPT' variable in such a way that it is rendered exportable. Do so before the main loop begins, duly guaranteeing that the __makeopts_jobs() function shall be called just once and obviating the need to incorporate the -T option of the xz(1) utility at a later juncture. Employ a regular expression to extract the suffix of each filename that is considered. The pattern matches what is commonly referred to as an extension whilst permitting for it to be preceded by "tar.". Once the suffix has been extracted, employ a trivial -v test so as to determine whether it exists as a key in the 'suffix_by' array. Jettison the __unpack_tar() function. Not only was it responsible for operating on files that are _not_ tarballs but it was also responsible for further scrutinising the nature of the suffix. Long had I considered it to be in poor taste. Instead, have each case pattern be accompanied by the appropriate command. To compensate for the aforementioned change, introduce the following case patterns. - tar.bz|tar.bz2|tbz|tbz2) - tar|tar.*|tgz) Further, jettison the following case patterns. - tar) - tbz|tbz2) - tgz) Jettison the __compose_bzip2_cmd() function. Instead, determine which command may be employed to handle bzip2-compressed streams before the main loop begins, unconditionally. Further, refrain from throwing an exception in the event that both the 'PORTAGE_BUNZIP2_COMMAND' and 'PORTAGE_BZIP2_COMMAND' variables are defined as blank values. Refrain entirely from employing shell pipelines to combine the tar(1) utility with any particular decompression utility. Instead, delegate the responsibility of executing the decompressor entirely to tar(1). The rationale for this is twofold. Firstly, GNU tar is already perfectly capable of recognising a considerable number of file suffixes in the case that the -f option is specified. No fewer than 16, in fact. Based on these suffixes, it is further capable of determining whether to execute any of 8 distinct decompression utilities: compress(1), bzip2(1), gzip(1), lzip(1), lzma(1), lzop(1), xz(1) and zstd(1). Ergo, there is no sense in maintaining equivalnt bash code that amounts merely to busywork. Secondly, there have been several portage bugs concerning the use of shell pipelines over the years. Some pertain to the matter of knowing the exit status of the decompressor, while others pertain to the consumption of malformed streams that cause tar(1) to close its end of the pipe while the decompressor is still writing. By shifting the burden of responsibility to tar(1), this entire class of problem is obviated. It is worth noting that bzip2-compressed tarballs are handled as a special case. The reason for this is straightforward: there is the continued need to support the 'PORTAGE_BUNZIP2_COMMAND' and 'PORTAGE_BZIP2_COMMAND' portage variables. To accommodate these, the -I option is used to convey a suitable decompression command. For instance, to specify -I "lbzip2 -c" would imply to tar(1) that it must execute lbzip2(1) with both the -c and -d options in effect. Though the -I option-argument is not conveyed to a shell, it is parsed in a manner that somewhat resembles that of a shell. Therefore, the values of the aforementioned portage variables shall only be accepted if they consist entirely of one or more non-blank characters that are neither the single quote nor the double quote. Fixes: 502bd866c85a44852bf1b214c3504d00ca50add5 Closes: https://bugs.gentoo.org/962874 Link: https://www.gnu.org/software/tar/manual/tar.html#auto_002dcompress Signed-off-by: Kerin Millar <kfm <AT> plushkava.net> Signed-off-by: Sam James <sam <AT> gentoo.org> bin/phase-helpers.sh | 135 ++++++++++++++++++++++++++----------------------- bin/save-ebuild-env.sh | 2 - 2 files changed, 73 insertions(+), 64 deletions(-) diff --git a/bin/phase-helpers.sh b/bin/phase-helpers.sh index fa33090dec..02b1aea4ee 100644 --- a/bin/phase-helpers.sh +++ b/bin/phase-helpers.sh @@ -322,47 +322,65 @@ use_enable() { } unpack() { - local created_symlink suffix_known basename output srcdir suffix f - - local -a bzip2_cmd + local created_symlink bzip2_cmd basename output srcdir suffix name f + local -A suffix_by + local -a suffixes + local -x XZ_OPT if (( $# == 0 )); then die "unpack: too few arguments (got 0; expected at least 1)" fi - __unpack_tar() { - local inner_suffix - - inner_suffix=${basename%.*} inner_suffix=${inner_suffix##*.} - if ! ___eapi_unpack_is_case_sensitive; then - inner_suffix=${inner_suffix,,} - fi - if [[ ${inner_suffix} == tar ]]; then - "$@" -c -- "${srcdir}${f}" | tar xof - - else - "$@" -c -- "${srcdir}${f}" > "${basename%.*}" - fi - } - - __compose_bzip2_cmd() { - local IFS + # Define an array of supported suffixes, case-sensitively. + # https://projects.gentoo.org/pms/8/pms.html#x1-13500012.3.15 + suffixes=( + a + bz + bz2 + deb + gz + jar + lzma + tar + tar.bz + tar.bz2 + tar.gz + tar.lzma + tar.Z + tbz + tbz2 + tgz + Z + zip + ZIP + ) + ___eapi_unpack_supports_7z && suffixes+=( 7z 7Z ) + ___eapi_unpack_supports_lha && suffixes+=( lha LHa LHA lzh ) + ___eapi_unpack_supports_rar && suffixes+=( rar RAR ) + ___eapi_unpack_supports_txz && suffixes+=( tar.xz txz ) + ___eapi_unpack_supports_xz && suffixes+=( xz ) + + # Compose a finalised dictionary of supported suffixes. + if ! ___eapi_unpack_is_case_sensitive; then + # Induce lowercase conversion upon all subsequent assignments. + typeset -l suffix + fi + for suffix in "${suffixes[@]}"; do + suffix_by[$suffix]= + done - read -rd '' -a bzip2_cmd <<<"${PORTAGE_BUNZIP2_COMMAND}" - if (( ! ${#bzip2_cmd[@]} )); then - read -rd '' -a bzip2_cmd <<<"${PORTAGE_BZIP2_COMMAND}" - if (( ${#bzip2_cmd[@]} )); then - bzip2_cmd+=( -d ) - else - die "unpack: both PORTAGE_BUNZIP2_COMMAND and PORTAGE_BZIP2_COMMAND specify null commands" - fi + # Honour the user's choice of bzip2 decompressor, if specified. + for name in PORTAGE_BUNZIP2_CMD PORTAGE_BZIP2_CMD; do + if [[ ${!name} == +([![:blank:]\"\']) ]]; then + bzip2_cmd=${!name} + break fi - } + done - shopt -o -s pipefail - - for f in "$@"; do - basename=${f##*/} - suffix=${basename##*.} + # Ensure that xz(1) operates in its multi-threaded mode. + XZ_OPT="-T$(___makeopts_jobs)" + for f; do # wrt PMS 12.3.15 Misc Commands if [[ ${f} != */* ]]; then # filename without path of any kind @@ -391,22 +409,16 @@ unpack() { die "unpack: ${f@Q} cannot be unpacked because it is an empty file" fi - case ${suffix,,} in - tar|tgz|tbz2|tbz|zip|jar|gz|z|bz2|bz|a|deb|lzma) ;; - 7z) ___eapi_unpack_supports_7z ;; - rar) ___eapi_unpack_supports_rar ;; - lha|lzh) ___eapi_unpack_supports_lha ;; - xz) ___eapi_unpack_supports_xz ;; - txz) ___eapi_unpack_supports_txz ;; - *) false ;; - esac \ - && suffix_known=1 - - ___eapi_unpack_is_case_sensitive \ - && [[ ${suffix} != @("${suffix,,}"|ZIP|Z|7Z|RAR|LH[Aa]) ]] \ - && suffix_known=0 - - if (( suffix_known )); then + + # Extract the suffix of the filename. + basename=${f##*/} + suffix= + if [[ ${basename} =~ \.([Tt][Aa][Rr]\.)?[^.]+$ ]]; then + suffix=${BASH_REMATCH[0]#.} + fi + + # Skip any files bearing unsupported suffixes. + if [[ -v 'suffix_by[$suffix]' ]]; then __vecho ">>> Unpacking ${f@Q} to ${PWD}" else __vecho "=== Skipping unpack of ${f@Q}" @@ -424,8 +436,7 @@ unpack() { ar x "${srcdir}${f}" ;; bz|bz2) - (( ${#bzip2_cmd[@]} )) || __compose_bzip2_cmd - __unpack_tar "${bzip2_cmd[@]}" + "${bzip2_cmd-bzip2}" -dc -- "${srcdir}${f}" > "${basename%.*}" ;; deb) # Unpacking .deb archives can not always be done with @@ -451,7 +462,7 @@ unpack() { fi ;; gz|z) - __unpack_tar gzip -d + gzip -dc -- "${srcdir}${f}" > "${basename%.*}" ;; jar|zip) # unzip will interactively prompt under some error conditions, @@ -463,26 +474,26 @@ unpack() { lha xfq "${srcdir}${f}" ;; lzma) - __unpack_tar lzma -d + xz -F lzma -dc -- "${srcdir}${f}" > "${basename%.*}" ;; rar) unrar x -idq -o+ "${srcdir}${f}" ;; - tar) - tar xof "${srcdir}${f}" - ;; - tbz|tbz2) - (( ${#bzip2_cmd[@]} )) || __compose_bzip2_cmd - "${bzip2_cmd[@]}" -c -- "${srcdir}${f}" | tar xof - + tar.bz|tar.bz2|tbz|tbz2) + tar -I "${bzip2_cmd-bzip2} -c" -xof "${srcdir}${f}" ;; - tgz) - tar xozf "${srcdir}${f}" + tar|tar.*|tgz) + # GNU tar recognises various file suffixes, for + # which it is able to execute the appropriate + # decompressor. They are documented by the + # (info) manual for the -a option. + tar --warning=decompress-program -xof "${srcdir}${f}" ;; txz) - XZ_OPT="-T$(___makeopts_jobs)" tar xof "${srcdir}${f}" + tar -xJof "${srcdir}${f}" ;; xz) - __unpack_tar xz -T"$(___makeopts_jobs)" -d + xz -dc -- "${srcdir}${f}" > "${basename%.*}" ;; esac || die "unpack: failure unpacking ${f@Q}" done diff --git a/bin/save-ebuild-env.sh b/bin/save-ebuild-env.sh index b520a94858..ace7e41f9c 100644 --- a/bin/save-ebuild-env.sh +++ b/bin/save-ebuild-env.sh @@ -184,7 +184,6 @@ __save_ebuild_env() ( __abort_prepare __abort_test __check_bash_version - __compose_bzip2_cmd __dump_trace __dyn_clean __dyn_compile @@ -223,7 +222,6 @@ __save_ebuild_env() ( __start_distcc __strip_duplicate_slashes __try_source - __unpack_tar __unset_colors __vecho __ver_compare
