commit: 6565ad2eb90fe0840047d097fa5637d176a7d580 Author: Matt Turner <mattst88 <AT> gentoo <DOT> org> AuthorDate: Fri Apr 17 03:21:34 2020 +0000 Commit: Matt Turner <mattst88 <AT> gentoo <DOT> org> CommitDate: Fri Apr 17 17:20:18 2020 +0000 URL: https://gitweb.gentoo.org/proj/catalyst.git/commit/?id=6565ad2e
catalyst: Use hashlib instead of external tools Signed-off-by: Matt Turner <mattst88 <AT> gentoo.org> README | 1 - catalyst/base/genbase.py | 18 ++++++- catalyst/base/stagebase.py | 8 +-- catalyst/hash_utils.py | 126 --------------------------------------------- catalyst/main.py | 42 ++------------- doc/catalyst-config.5.txt | 8 +-- etc/catalyst.conf | 18 +++---- 7 files changed, 35 insertions(+), 186 deletions(-) diff --git a/README b/README index eb75ba67..1a039fca 100644 --- a/README +++ b/README @@ -20,7 +20,6 @@ Requirements - Python 3.6 or greater - An ebuild repository snapshot (or an ebuild tree to create one) - A generic stage3 tarball for your architecture -- shash for digest support What is catalyst? ======================== diff --git a/catalyst/base/genbase.py b/catalyst/base/genbase.py index 08076460..632ee0d9 100644 --- a/catalyst/base/genbase.py +++ b/catalyst/base/genbase.py @@ -1,4 +1,5 @@ +import hashlib import io import os @@ -11,6 +12,20 @@ class GenBase(): def __init__(self, myspec): self.settings = myspec + @staticmethod + def generate_hash(filepath, name): + h = hashlib.new(name) + + with open(filepath, 'rb') as f: + while True: + data = f.read(8192) + if not data: + break + h.update(data) + + filename = os.path.split(filepath)[1] + return f'# {name.upper()} HASH\n{h.hexdigest()} {filename}\n' + def gen_contents_file(self, path): contents = path + ".CONTENTS" if os.path.exists(contents): @@ -29,11 +44,10 @@ class GenBase(): if os.path.exists(digests): os.remove(digests) if "digests" in self.settings: - hash_map = self.settings["hash_map"] if os.path.exists(path): with io.open(digests, "w", encoding='utf-8') as myf: for f in [path, path + '.CONTENTS']: if os.path.exists(f): for i in self.settings["digests"].split(): - digest = hash_map.generate_hash(f, hash_=i) + digest = self.generate_hash(f, name=i) myf.write(digest) diff --git a/catalyst/base/stagebase.py b/catalyst/base/stagebase.py index 5f3fa1d0..71bf1ef9 100644 --- a/catalyst/base/stagebase.py +++ b/catalyst/base/stagebase.py @@ -442,9 +442,7 @@ class StageBase(TargetBase, ClearBase, GenBase): # XXX: Is this even necessary if the previous check passes? if os.path.exists(self.settings["source_path"]): self.settings["source_path_hash"] = \ - self.settings["hash_map"].generate_hash( - self.settings["source_path"], - hash_="sha1") + self.generate_hash(self.settings["source_path"], "sha1") log.notice('Source path set to %s', self.settings['source_path']) def set_dest_path(self): @@ -469,9 +467,7 @@ class StageBase(TargetBase, ClearBase, GenBase): ) log.info('SNAPSHOT_PATH set to: %s', self.settings['snapshot_path']) self.settings["snapshot_path_hash"] = \ - self.settings["hash_map"].generate_hash( - self.settings["snapshot_path"], - hash_="sha1") + self.generate_hash(self.settings["snapshot_path"], "sha1") def set_snapcache_path(self): self.settings["snapshot_cache_path"] = \ diff --git a/catalyst/hash_utils.py b/catalyst/hash_utils.py deleted file mode 100644 index 3aae890e..00000000 --- a/catalyst/hash_utils.py +++ /dev/null @@ -1,126 +0,0 @@ - -import os -from collections import namedtuple -from subprocess import Popen, PIPE - -from catalyst import log -from catalyst.support import CatalystError - - -# Use HashMap.fields for the value legend -# fields = ["func", "cmd", "args", "id"] -HASH_DEFINITIONS = { - "adler32" :["calc_hash2", "shash", ["-a", "ADLER32"], "ADLER32"], - "blake2" :["calc_hash", "b2sum", [ ], "BLAKE2"], - "crc32" :["calc_hash2", "shash", ["-a", "CRC32"], "CRC32"], - "crc32b" :["calc_hash2", "shash", ["-a", "CRC32B"], "CRC32B"], - "gost" :["calc_hash2", "shash", ["-a", "GOST"], "GOST"], - "haval128" :["calc_hash2", "shash", ["-a", "HAVAL128"], "HAVAL128"], - "haval160" :["calc_hash2", "shash", ["-a", "HAVAL160"], "HAVAL160"], - "haval192" :["calc_hash2", "shash", ["-a", "HAVAL192"], "HAVAL192"], - "haval224" :["calc_hash2", "shash", ["-a", "HAVAL224"], "HAVAL224"], - "haval256" :["calc_hash2", "shash", ["-a", "HAVAL256"], "HAVAL256"], - "md2" :["calc_hash2", "shash", ["-a", "MD2"], "MD2"], - "md4" :["calc_hash2", "shash", ["-a", "MD4"], "MD4"], - "md5" :["calc_hash2", "shash", ["-a", "MD5"], "MD5"], - "ripemd128":["calc_hash2", "shash", ["-a", "RIPEMD128"], "RIPEMD128"], - "ripemd160":["calc_hash2", "shash", ["-a", "RIPEMD160"], "RIPEMD160"], - "ripemd256":["calc_hash2", "shash", ["-a", "RIPEMD256"], "RIPEMD256"], - "ripemd320":["calc_hash2", "shash", ["-a", "RIPEMD320"], "RIPEMD320"], - "sha1" :["calc_hash2", "shash", ["-a", "SHA1"], "SHA1"], - "sha224" :["calc_hash2", "shash", ["-a", "SHA224"], "SHA224"], - "sha256" :["calc_hash2", "shash", ["-a", "SHA256"], "SHA256"], - "sha384" :["calc_hash2", "shash", ["-a", "SHA384"], "SHA384"], - "sha512" :["calc_hash2", "shash", ["-a", "SHA512"], "SHA512"], - "snefru128":["calc_hash2", "shash", ["-a", "SNEFRU128"], "SNEFRU128"], - "snefru256":["calc_hash2", "shash", ["-a", "SNEFRU256"], "SNEFRU256"], - "tiger" :["calc_hash2", "shash", ["-a", "TIGER"], "TIGER"], - "tiger128" :["calc_hash2", "shash", ["-a", "TIGER128"], "TIGER128"], - "tiger160" :["calc_hash2", "shash", ["-a", "TIGER160"], "TIGER160"], - "whirlpool":["calc_hash2", "shash", ["-a", "WHIRLPOOL"], "WHIRLPOOL"], -} - - -class HashMap(): - '''Class for handling - Catalyst's hash generation''' - - fields = ["func", "cmd", "args", "id"] - - def __init__(self, hashes=None): - '''Class init - - @param hashes: dictionary of Key:[function, cmd, cmd_args, Print string] - @param fields: list of ordered field names for the hashes - eg: ["func", "cmd", "args", "id"] - ''' - if hashes is None: - hashes = {} - self.hash_map = {} - - # create the hash definition namedtuple classes - for name in list(hashes): - obj = namedtuple(name, self.fields) - obj.__slots__ = () - self.hash_map[name] = obj._make(hashes[name]) - del obj - - def generate_hash(self, file_, hash_="crc32"): - '''Prefered method of generating a hash for the passed in file_ - - @param file_: the file to generate the hash for - @param hash_: the hash algorythm to use - @returns the hash result - ''' - try: - return getattr(self, self.hash_map[hash_].func)( - file_, - hash_) - except: - raise CatalystError("Error generating hash, is appropriate " + - "utility installed on your system?", print_traceback=True) - - def calc_hash(self, file_, hash_): - ''' - Calculate the hash for "file_" - - @param file_: the file to generate the hash for - @param hash_: the hash algorythm to use - @returns the hash result - ''' - _hash = self.hash_map[hash_] - args = [_hash.cmd] - args.extend(_hash.args) - args.append(file_) - log.debug('args = %r', args) - source = Popen(args, stdout=PIPE) - output = source.communicate() - mylines = output[0].decode('ascii') - log.info('%s (%s) = %s', _hash.id, file_, mylines) - result = "# " + _hash.id + " (b2sum) HASH\n" + mylines - return result - - def calc_hash2(self, file_, hash_type): - ''' - Calculate the hash for "file_" - - @param file_: the file to generate the hash for - @param hash_: the hash algorythm to use - @returns the hash result - ''' - _hash = self.hash_map[hash_type] - args = [_hash.cmd] - args.extend(_hash.args) - args.append(file_) - log.debug('args = %r', args) - source = Popen(args, stdout=PIPE) - output = source.communicate() - lines = output[0].decode('ascii').split('\n') - log.debug('output = %s', output) - header = lines[0] - h_f = lines[1].split() - hash_result = h_f[0] - short_file = os.path.split(h_f[1])[1] - result = header + "\n" + hash_result + " " + short_file + "\n" - log.info('%s (%s) = %s', header, short_file, result) - return result diff --git a/catalyst/main.py b/catalyst/main.py index cb3cd3f7..d8e791c4 100644 --- a/catalyst/main.py +++ b/catalyst/main.py @@ -1,5 +1,6 @@ import argparse import datetime +import hashlib import os import sys @@ -13,7 +14,6 @@ from DeComp.contents import ContentsMap from catalyst import log import catalyst.config from catalyst.defaults import confdefaults, option_messages, DEFAULT_CONFIG_FILE -from catalyst.hash_utils import HashMap, HASH_DEFINITIONS from catalyst.support import CatalystError from catalyst.version import get_version @@ -335,55 +335,23 @@ def _main(parser, opts): list_xattrs_opt=conf_values['list_xattrs_opt']) conf_values["contents_map"] = contents_map - # initialze our hash and contents generators - hash_map = HashMap(HASH_DEFINITIONS) - conf_values["hash_map"] = hash_map - # initialize our (de)compression definitions conf_values['decompress_definitions'] = DECOMPRESS_DEFINITIONS conf_values['compress_definitions'] = COMPRESS_DEFINITIONS # TODO add capability to config/spec new definitions - # Start checking that digests are valid now that hash_map is initialized if "digests" in conf_values: digests = set(conf_values['digests'].split()) - valid_digests = set(HASH_DEFINITIONS.keys()) - - # Use the magic keyword "auto" to use all algos that are available. - skip_missing = False - if 'auto' in digests: - skip_missing = True - digests.remove('auto') - if not digests: - digests = set(valid_digests) + valid_digests = hashlib.algorithms_available # First validate all the requested digests are valid keys. if digests - valid_digests: log.critical( - 'These are not valid digest entries:\n' - '%s\n' - 'Valid digest entries:\n' - '%s', - ', '.join(digests - valid_digests), + 'These are not valid digest entries:\n%s\n' + 'Valid digest entries:\n%s', + ', '.join(sorted(digests - valid_digests)), ', '.join(sorted(valid_digests))) - # Then check for any programs that the hash func requires. - for digest in digests: - try: - process.find_binary(hash_map.hash_map[digest].cmd) - except process.CommandNotFound: - # In auto mode, just ignore missing support. - if skip_missing: - digests.remove(digest) - continue - log.critical( - 'The "%s" binary needed by digest "%s" was not found. ' - 'It needs to be in your system path.', - hash_map.hash_map[digest].cmd, digest) - - # Now reload the config with our updated value. - conf_values['digests'] = ' '.join(digests) - addlargs = {} if myspecfile: diff --git a/doc/catalyst-config.5.txt b/doc/catalyst-config.5.txt index d5e8c128..14b7dd5a 100644 --- a/doc/catalyst-config.5.txt +++ b/doc/catalyst-config.5.txt @@ -181,10 +181,10 @@ much RAM everything will fail horribly and it is not our fault. SUPPORTED HASHES ---------------- -Supported hashes: adler32, crc32, crc32b, gost, haval128, haval160, -haval192, haval224, haval256, md2, md4, md5, ripemd128, ripemd160, -ripemd256, ripemd320, sha1, sha224, sha256, sha384, sha512, snefru128, -snefru256, tiger, tiger128, tiger160, whirlpool. +The list of supported hashes is dependent on the version of Python. To +see the list of hashes supported by the version of Python in use, run + + $ python3 -c 'import hashlib; print(hashlib.algorithms_available)' BINARY PACKAGE DEPENDENCIES diff --git a/etc/catalyst.conf b/etc/catalyst.conf index 4c4d491e..2e511cce 100644 --- a/etc/catalyst.conf +++ b/etc/catalyst.conf @@ -3,16 +3,14 @@ # Simple descriptions of catalyst settings. Please refer to the online # documentation for more information. -# Creates a .DIGESTS file containing the hash output from any of the supported -# options below. Adding them all may take a long time on slower systems. The -# special "auto" keyword will skip digests that the system does not support, -# and if it's the only keyword given, will default to enabling all digests. -# Supported hashes: -# adler32, blake2, crc32, crc32b, gost, haval128, haval160, haval192, haval224, -# haval256, md2, md4, md5, ripemd128, ripemd160, ripemd256, ripemd320, sha1, -# sha224, sha256, sha384, sha512, snefru128, snefru256, tiger, tiger128, -# tiger160, whirlpool -digests="blake2 sha512" +# Creates a .DIGESTS file containing the hash output from each of the selected +# hashes. +# +# To see a list of supported hashes, run +# +# $ python3 -c 'import hashlib; print(hashlib.algorithms_available)' +# +digests="blake2b sha512" # distdir specifies where your distfiles are located. This setting should # work fine for most default installations.