Re: [arch-projects] [namcap] [PATCH] Add handling for compression into namcap.py
On Fri, 11 Dec 2020 at 20:29, Eli Schwartz via arch-projects wrote: > > On 12/11/20 2:25 PM, meganomic via arch-projects wrote: > > Adds handling of the compression and temporary storage into namcap.py > > so it can be removed from the bash script. > > https://bugs.archlinux.org/task/59844 mentions "use setuptools entry > > points." instead of the bash script but I don't know how to do that. > > I just removed it from the bash script for now. > > The subprocess to decompression tools is still (with your patch) just as > annoying to me as the existence of the bash script. Python's builtin > tarfile.open can handle gz, bz2, or xz. Fwiw the python build-in handling of some archives is an order of magnitude slower, than on a uncompressed file. From brief testing, extracting is almost always a (time) win. Ages ago I was debugging a pathological case where a sub 20MB (compressed) binary package was of taking well over 6 _minutes_ IIRC with namcap. Will polish and send up my patches over the next few days. They help significantly, in said said case and overall. Regards, Emil
Re: [arch-projects] [namcap] [PATCH] Add handling for compression into namcap.py
Fixed some potential bugs and made it all around better. I'm working on upstreaming support in xtarfile for LZ4 compression so hopefully the special handling for that can be removed soon. I also fixed a bug in xtarfile that prevents .tar files from being opened. So once the new version hits the repos 'import tarfile' can be removed. diff --git a/namcap b/namcap index ea0bc94..019b077 100755 --- a/namcap +++ b/namcap @@ -1,39 +1,2 @@ #!/bin/bash - -args='' -tmp=$(mktemp -d --tmpdir namcap.XX) -cleanup() { - rm -rf "${tmp}" -} -trap 'cleanup' 0 - -for arg in "${@}"; do - if echo "${arg}" | grep -q -E "^.+\.pkg\.tar\..+$" && [ -f "${arg}" ]; then - - extra_opts='' - case "${arg##*.}" in - gz|z|Z) cmd='gzip' ;; - bz2|bz) cmd='bzip2' ;; - xz) cmd='xz' ;; - lzo)cmd='lzop' ;; - lrz)cmd='lrzip' - extra_opts="-q -o -" ;; - lz4)cmd='lz4' - extra_opts="-q" ;; - lz) cmd='lzip' - extra_opts="-q" ;; - zst)cmd='zstd' - extra_opts="-q" ;; - *) echo 'Unsupported compression'; exit 1;; - esac - - tar="${tmp}/$(basename "${arg%.*}")" - $cmd -dcf $extra_opts "${arg}" > "${tar}" - - args="${args} ${tar}" - else - args="${args} ${arg}" - fi -done - -/usr/bin/env python3 -m namcap ${args} +/usr/bin/env python3 -m namcap ${@} diff --git a/namcap.py b/namcap.py index a7f532a..78662db 100755 --- a/namcap.py +++ b/namcap.py @@ -22,7 +22,11 @@ import getopt import os import sys +import subprocess +import tempfile +import pathlib import tarfile +import xtarfile import Namcap.depends import Namcap.tags @@ -49,18 +53,6 @@ def usage(): sys.exit(2) -def open_package(filename): - try: - tar = tarfile.open(filename, "r") - if '.PKGINFO' not in tar.getnames(): - tar.close() - return None - except IOError: - if tar: - tar.close() - return None - return tar - def check_rules_exclude(optlist): '''Check if the -r (--rules) and the -r (--exclude) options are being used at same time''' @@ -76,13 +68,10 @@ def show_messages(name, key, messages): for msg in messages: print("%s %s: %s" % (name, key, Namcap.tags.format_message(msg))) -def process_realpackage(package, modules): +def process_realpackage(pkgtar, package, modules): """Runs namcap checks over a package tarball""" - extracted = 0 - pkgtar = open_package(package) - - if not pkgtar: - print("Error: %s is empty or is not a valid package" % package) + if '.PKGINFO' not in pkgtar.getnames(): + raise TypeError('Error: %s is not a valid package' % package) return 1 pkginfo = Namcap.package.load_from_tarball(package) @@ -98,7 +87,7 @@ def process_realpackage(package, modules): rule.analyze(pkginfo, pkgtar) else: show_messages(pkginfo["name"], 'E', - [('error-running-rule %s', i)]) + [('error-running-rule %s', i)]) # Output the three types of messages show_messages(pkginfo["name"], 'E', rule.errors) @@ -237,15 +226,71 @@ if len(active_modules) == 0: # Go through each package, get the info, and apply the rules for package in packages: - if not os.access(package, os.R_OK): + pkgpath = pathlib.Path(package) + + if not pkgpath.is_file(): print("Error: Problem reading %s" % package) usage() - - if os.path.isfile(package) and tarfile.is_tarfile(package): - process_realpackage(package, active_modules) - elif 'PKGBUILD' in package: + sys.exit(1) + + if pkgpath.with_suffix('').suffix == '.tar' or pkgpath.suffix == '.tar': + try: # Try using xtarfile first + with xtarfile.open(package, "r") as pkgtar: + process_realpackage(pkgtar, package, active_modules) + + # If xtarfile can't handle the compression do it with external software + except NotImplementedError: + if pkgpath.suffix == '.lzo': + cmd = ['lzop', '-cdqf', pkgpath.as_posix()] + elif pkgpath.suffix == '.lrz': + cmd = ['lrzip', '-dqfo', '-', pkgpath.as_posix()] + elif
Re: [arch-projects] [namcap] [PATCH] Add handling for compression into namcap.py
On 12/11/20 5:10 PM, meganomic via arch-projects wrote: > I did a quick patch using your idea and it definitely seems like a > good way to do it. Can probably clean it up and make it look prettier > with some refactoring. I'll look into it later. Nice, thanks! Haven't taken more than a cursory look at the implementation, but one thing stands out for cleanup, see below. diff --git a/namcap.py b/namcap.py index a7f532a..0e78b89 100755 --- a/namcap.py +++ b/namcap.py @@ -22,7 +22,10 @@ import getopt import os import sys -import tarfile +import subprocess +import tempfile +import pathlib +import xtarfile import Namcap.depends import Namcap.tags @@ -49,18 +52,6 @@ def usage(): sys.exit(2) -def open_package(filename): - try: - tar = tarfile.open(filename, "r") - if '.PKGINFO' not in tar.getnames(): - tar.close() - return None - except IOError: - if tar: - tar.close() - return None - return tar - def check_rules_exclude(optlist): '''Check if the -r (--rules) and the -r (--exclude) options are being used at same time''' @@ -79,39 +70,38 @@ def show_messages(name, key, messages): def process_realpackage(package, modules): """Runs namcap checks over a package tarball""" extracted = 0 - pkgtar = open_package(package) - - if not pkgtar: - print("Error: %s is empty or is not a valid package" % package) - return 1 - - pkginfo = Namcap.package.load_from_tarball(package) - # Loop through each one, load them apply if possible - for i in modules: - rule = get_modules()[i]() - - if isinstance(rule, Namcap.ruleclass.PkgInfoRule): - rule.analyze(pkginfo, None) - elif isinstance(rule, Namcap.ruleclass.PkgbuildRule): - pass - elif isinstance(rule, Namcap.ruleclass.TarballRule): - rule.analyze(pkginfo, pkgtar) - else: - show_messages(pkginfo["name"], 'E', - [('error-running-rule %s', i)]) - - # Output the three types of messages - show_messages(pkginfo["name"], 'E', rule.errors) - show_messages(pkginfo["name"], 'W', rule.warnings) + with xtarfile.open(package, "r") as pkgtar: + if '.PKGINFO' not in pkgtar.getnames(): + print("Error: %s is not a valid package" % package) + return 1 + + pkginfo = Namcap.package.load_from_tarball(package) + # Loop through each one, load them apply if possible + for i in modules: + rule = get_modules()[i]() + + if isinstance(rule, Namcap.ruleclass.PkgInfoRule): + rule.analyze(pkginfo, None) + elif isinstance(rule, Namcap.ruleclass.PkgbuildRule): + pass + elif isinstance(rule, Namcap.ruleclass.TarballRule): + rule.analyze(pkginfo, pkgtar) + else: + show_messages(pkginfo["name"], 'E', + [('error-running-rule %s', i)]) + + # Output the three types of messages + show_messages(pkginfo["name"], 'E', rule.errors) + show_messages(pkginfo["name"], 'W', rule.warnings) + if info_reporting: + show_messages(pkginfo["name"], 'I', rule.infos) + + # dependency analysis + errs, warns, infos = Namcap.depends.analyze_depends(pkginfo) + show_messages(pkginfo["name"], 'E', errs) + show_messages(pkginfo["name"], 'W', warns) if info_reporting: - show_messages(pkginfo["name"], 'I', rule.infos) - - # dependency analysis - errs, warns, infos = Namcap.depends.analyze_depends(pkginfo) - show_messages(pkginfo["name"], 'E', errs) - show_messages(pkginfo["name"], 'W', warns) - if info_reporting: - show_messages(pkginfo["name"], 'I', infos) + show_messages(pkginfo["name"], 'I', infos) def process_pkginfo(pkginfo, modules): """Runs namcap checks of a single, non-split PacmanPackage object""" @@ -237,14 +227,45 @@ if len(active_modules) == 0: # Go through each package, get the info, and apply the rules for package in packages: - if not os.access(package, os.R_OK): + pkgpath = pathlib.Path(package) + + if not pkgpath.is_file(): print("Error: Problem reading %s" % package) - usage() +
Re: [arch-projects] [namcap] [PATCH] Add handling for compression into namcap.py
I refactored it and I think I've ironed out all the bugs. Does anyone have any additional ideas or see any glaring problems? diff --git a/namcap b/namcap index ea0bc94..019b077 100755 --- a/namcap +++ b/namcap @@ -1,39 +1,2 @@ #!/bin/bash - -args='' -tmp=$(mktemp -d --tmpdir namcap.XX) -cleanup() { - rm -rf "${tmp}" -} -trap 'cleanup' 0 - -for arg in "${@}"; do - if echo "${arg}" | grep -q -E "^.+\.pkg\.tar\..+$" && [ -f "${arg}" ]; then - - extra_opts='' - case "${arg##*.}" in - gz|z|Z) cmd='gzip' ;; - bz2|bz) cmd='bzip2' ;; - xz) cmd='xz' ;; - lzo)cmd='lzop' ;; - lrz)cmd='lrzip' - extra_opts="-q -o -" ;; - lz4)cmd='lz4' - extra_opts="-q" ;; - lz) cmd='lzip' - extra_opts="-q" ;; - zst)cmd='zstd' - extra_opts="-q" ;; - *) echo 'Unsupported compression'; exit 1;; - esac - - tar="${tmp}/$(basename "${arg%.*}")" - $cmd -dcf $extra_opts "${arg}" > "${tar}" - - args="${args} ${tar}" - else - args="${args} ${arg}" - fi -done - -/usr/bin/env python3 -m namcap ${args} +/usr/bin/env python3 -m namcap ${@} diff --git a/namcap.py b/namcap.py index a7f532a..9a240dc 100755 --- a/namcap.py +++ b/namcap.py @@ -22,7 +22,11 @@ import getopt import os import sys +import subprocess +import tempfile +import pathlib import tarfile +import xtarfile import Namcap.depends import Namcap.tags @@ -49,18 +53,6 @@ def usage(): sys.exit(2) -def open_package(filename): - try: - tar = tarfile.open(filename, "r") - if '.PKGINFO' not in tar.getnames(): - tar.close() - return None - except IOError: - if tar: - tar.close() - return None - return tar - def check_rules_exclude(optlist): '''Check if the -r (--rules) and the -r (--exclude) options are being used at same time''' @@ -76,13 +68,10 @@ def show_messages(name, key, messages): for msg in messages: print("%s %s: %s" % (name, key, Namcap.tags.format_message(msg))) -def process_realpackage(package, modules): +def process_realpackage(pkgtar, package, modules): """Runs namcap checks over a package tarball""" - extracted = 0 - pkgtar = open_package(package) - - if not pkgtar: - print("Error: %s is empty or is not a valid package" % package) + if '.PKGINFO' not in pkgtar.getnames(): + raise TypeError('Error: %s is not a valid package' % package) return 1 pkginfo = Namcap.package.load_from_tarball(package) @@ -98,7 +87,7 @@ def process_realpackage(package, modules): rule.analyze(pkginfo, pkgtar) else: show_messages(pkginfo["name"], 'E', - [('error-running-rule %s', i)]) + [('error-running-rule %s', i)]) # Output the three types of messages show_messages(pkginfo["name"], 'E', rule.errors) @@ -237,15 +226,73 @@ if len(active_modules) == 0: # Go through each package, get the info, and apply the rules for package in packages: - if not os.access(package, os.R_OK): + pkgpath = pathlib.Path(package) + + if not pkgpath.is_file(): print("Error: Problem reading %s" % package) usage() + sys.exit(1) + + if pkgpath.with_suffix('').suffix == '.tar': + # What compression is used? + try: # Try using xtarfile first + with xtarfile.open(package, "r") as pkgtar: + process_realpackage(pkgtar, package, active_modules) + + # If xtarfile can't handle the compression do it with external software + except NotImplementedError: + if pkgpath.suffix == '.lzo': + cmd = 'lzop' + extra_opts = '-c' + elif pkgpath.suffix == '.lrz': + cmd = 'lrzip' + extra_opts = '-o -' + elif pkgpath.suffix == '.lz4': + cmd = 'lz4' + extra_opts = '-c' + elif pkgpath.suffix == '.lz': + cmd = 'lzip' + extra_opts='-c' + else: +
Re: [arch-projects] [namcap] [PATCH] Add handling for compression into namcap.py
I did a quick patch using your idea and it definitely seems like a good way to do it. Can probably clean it up and make it look prettier with some refactoring. I'll look into it later. diff --git a/namcap.py b/namcap.py index a7f532a..0e78b89 100755 --- a/namcap.py +++ b/namcap.py @@ -22,7 +22,10 @@ import getopt import os import sys -import tarfile +import subprocess +import tempfile +import pathlib +import xtarfile import Namcap.depends import Namcap.tags @@ -49,18 +52,6 @@ def usage(): sys.exit(2) -def open_package(filename): - try: - tar = tarfile.open(filename, "r") - if '.PKGINFO' not in tar.getnames(): - tar.close() - return None - except IOError: - if tar: - tar.close() - return None - return tar - def check_rules_exclude(optlist): '''Check if the -r (--rules) and the -r (--exclude) options are being used at same time''' @@ -79,39 +70,38 @@ def show_messages(name, key, messages): def process_realpackage(package, modules): """Runs namcap checks over a package tarball""" extracted = 0 - pkgtar = open_package(package) - - if not pkgtar: - print("Error: %s is empty or is not a valid package" % package) - return 1 - - pkginfo = Namcap.package.load_from_tarball(package) - # Loop through each one, load them apply if possible - for i in modules: - rule = get_modules()[i]() - - if isinstance(rule, Namcap.ruleclass.PkgInfoRule): - rule.analyze(pkginfo, None) - elif isinstance(rule, Namcap.ruleclass.PkgbuildRule): - pass - elif isinstance(rule, Namcap.ruleclass.TarballRule): - rule.analyze(pkginfo, pkgtar) - else: - show_messages(pkginfo["name"], 'E', - [('error-running-rule %s', i)]) - - # Output the three types of messages - show_messages(pkginfo["name"], 'E', rule.errors) - show_messages(pkginfo["name"], 'W', rule.warnings) + with xtarfile.open(package, "r") as pkgtar: + if '.PKGINFO' not in pkgtar.getnames(): + print("Error: %s is not a valid package" % package) + return 1 + + pkginfo = Namcap.package.load_from_tarball(package) + # Loop through each one, load them apply if possible + for i in modules: + rule = get_modules()[i]() + + if isinstance(rule, Namcap.ruleclass.PkgInfoRule): + rule.analyze(pkginfo, None) + elif isinstance(rule, Namcap.ruleclass.PkgbuildRule): + pass + elif isinstance(rule, Namcap.ruleclass.TarballRule): + rule.analyze(pkginfo, pkgtar) + else: + show_messages(pkginfo["name"], 'E', + [('error-running-rule %s', i)]) + + # Output the three types of messages + show_messages(pkginfo["name"], 'E', rule.errors) + show_messages(pkginfo["name"], 'W', rule.warnings) + if info_reporting: + show_messages(pkginfo["name"], 'I', rule.infos) + + # dependency analysis + errs, warns, infos = Namcap.depends.analyze_depends(pkginfo) + show_messages(pkginfo["name"], 'E', errs) + show_messages(pkginfo["name"], 'W', warns) if info_reporting: - show_messages(pkginfo["name"], 'I', rule.infos) - - # dependency analysis - errs, warns, infos = Namcap.depends.analyze_depends(pkginfo) - show_messages(pkginfo["name"], 'E', errs) - show_messages(pkginfo["name"], 'W', warns) - if info_reporting: - show_messages(pkginfo["name"], 'I', infos) + show_messages(pkginfo["name"], 'I', infos) def process_pkginfo(pkginfo, modules): """Runs namcap checks of a single, non-split PacmanPackage object""" @@ -237,14 +227,45 @@ if len(active_modules) == 0: # Go through each package, get the info, and apply the rules for package in packages: - if not os.access(package, os.R_OK): + pkgpath = pathlib.Path(package) + + if not pkgpath.is_file(): print("Error: Problem reading %s" % package) - usage() + parser.print_usage() + continue # Skip to next package if any + + if pkgpath.with_suffix('').suffix == '.tar': + # What compression is used? +
Re: [arch-projects] [namcap] [PATCH] Add handling for compression into namcap.py
On 12/11/20 2:25 PM, meganomic via arch-projects wrote: Adds handling of the compression and temporary storage into namcap.py so it can be removed from the bash script. https://bugs.archlinux.org/task/59844 mentions "use setuptools entry points." instead of the bash script but I don't know how to do that. I just removed it from the bash script for now. The subprocess to decompression tools is still (with your patch) just as annoying to me as the existence of the bash script. Python's builtin tarfile.open can handle gz, bz2, or xz. Using the python-xtarfile module you can use xtarfile.open to also handle zst. We should prefer the native code instead of basically inlining bash into python. True, in order to support lzo, lrz, lz4, lz, we might still need to do subprocess out and create a temporary file with the decompressed package, but this should really be a last resort (and users in practice should rarely hit this code). What do you think about trying xtarfile first? -- Eli Schwartz Bug Wrangler and Trusted User OpenPGP_signature Description: OpenPGP digital signature