commit:     6565ad2eb90fe0840047d097fa5637d176a7d580
Author:     Matt Turner <mattst88 <AT> gentoo <DOT> org>
AuthorDate: Fri Apr 17 03:21:34 2020 +0000
Commit:     Matt Turner <mattst88 <AT> gentoo <DOT> org>
CommitDate: Fri Apr 17 17:20:18 2020 +0000
URL:        https://gitweb.gentoo.org/proj/catalyst.git/commit/?id=6565ad2e

catalyst: Use hashlib instead of external tools

Signed-off-by: Matt Turner <mattst88 <AT> gentoo.org>

 README                     |   1 -
 catalyst/base/genbase.py   |  18 ++++++-
 catalyst/base/stagebase.py |   8 +--
 catalyst/hash_utils.py     | 126 ---------------------------------------------
 catalyst/main.py           |  42 ++-------------
 doc/catalyst-config.5.txt  |   8 +--
 etc/catalyst.conf          |  18 +++----
 7 files changed, 35 insertions(+), 186 deletions(-)

diff --git a/README b/README
index eb75ba67..1a039fca 100644
--- a/README
+++ b/README
@@ -20,7 +20,6 @@ Requirements
 - Python 3.6 or greater
 - An ebuild repository snapshot (or an ebuild tree to create one)
 - A generic stage3 tarball for your architecture
-- shash for digest support
 
 What is catalyst?
 ========================

diff --git a/catalyst/base/genbase.py b/catalyst/base/genbase.py
index 08076460..632ee0d9 100644
--- a/catalyst/base/genbase.py
+++ b/catalyst/base/genbase.py
@@ -1,4 +1,5 @@
 
+import hashlib
 import io
 import os
 
@@ -11,6 +12,20 @@ class GenBase():
     def __init__(self, myspec):
         self.settings = myspec
 
+    @staticmethod
+    def generate_hash(filepath, name):
+        h = hashlib.new(name)
+
+        with open(filepath, 'rb') as f:
+            while True:
+                data = f.read(8192)
+                if not data:
+                    break
+                h.update(data)
+
+        filename = os.path.split(filepath)[1]
+        return f'# {name.upper()} HASH\n{h.hexdigest()}  {filename}\n'
+
     def gen_contents_file(self, path):
         contents = path + ".CONTENTS"
         if os.path.exists(contents):
@@ -29,11 +44,10 @@ class GenBase():
         if os.path.exists(digests):
             os.remove(digests)
         if "digests" in self.settings:
-            hash_map = self.settings["hash_map"]
             if os.path.exists(path):
                 with io.open(digests, "w", encoding='utf-8') as myf:
                     for f in [path, path + '.CONTENTS']:
                         if os.path.exists(f):
                             for i in self.settings["digests"].split():
-                                digest = hash_map.generate_hash(f, hash_=i)
+                                digest = self.generate_hash(f, name=i)
                                 myf.write(digest)

diff --git a/catalyst/base/stagebase.py b/catalyst/base/stagebase.py
index 5f3fa1d0..71bf1ef9 100644
--- a/catalyst/base/stagebase.py
+++ b/catalyst/base/stagebase.py
@@ -442,9 +442,7 @@ class StageBase(TargetBase, ClearBase, GenBase):
                 # XXX: Is this even necessary if the previous check passes?
                 if os.path.exists(self.settings["source_path"]):
                     self.settings["source_path_hash"] = \
-                        self.settings["hash_map"].generate_hash(
-                            self.settings["source_path"],
-                            hash_="sha1")
+                        self.generate_hash(self.settings["source_path"], 
"sha1")
         log.notice('Source path set to %s', self.settings['source_path'])
 
     def set_dest_path(self):
@@ -469,9 +467,7 @@ class StageBase(TargetBase, ClearBase, GenBase):
         )
         log.info('SNAPSHOT_PATH set to: %s', self.settings['snapshot_path'])
         self.settings["snapshot_path_hash"] = \
-            self.settings["hash_map"].generate_hash(
-                self.settings["snapshot_path"],
-                hash_="sha1")
+            self.generate_hash(self.settings["snapshot_path"], "sha1")
 
     def set_snapcache_path(self):
         self.settings["snapshot_cache_path"] = \

diff --git a/catalyst/hash_utils.py b/catalyst/hash_utils.py
deleted file mode 100644
index 3aae890e..00000000
--- a/catalyst/hash_utils.py
+++ /dev/null
@@ -1,126 +0,0 @@
-
-import os
-from collections import namedtuple
-from subprocess import Popen, PIPE
-
-from catalyst import log
-from catalyst.support import CatalystError
-
-
-# Use HashMap.fields for the value legend
-# fields = ["func", "cmd", "args", "id"]
-HASH_DEFINITIONS = {
-    "adler32"  :["calc_hash2", "shash", ["-a", "ADLER32"], "ADLER32"],
-    "blake2"   :["calc_hash",  "b2sum", [ ], "BLAKE2"],
-    "crc32"    :["calc_hash2", "shash", ["-a", "CRC32"], "CRC32"],
-    "crc32b"   :["calc_hash2", "shash", ["-a", "CRC32B"], "CRC32B"],
-    "gost"     :["calc_hash2", "shash", ["-a", "GOST"], "GOST"],
-    "haval128" :["calc_hash2", "shash", ["-a", "HAVAL128"], "HAVAL128"],
-    "haval160" :["calc_hash2", "shash", ["-a", "HAVAL160"], "HAVAL160"],
-    "haval192" :["calc_hash2", "shash", ["-a", "HAVAL192"], "HAVAL192"],
-    "haval224" :["calc_hash2", "shash", ["-a", "HAVAL224"], "HAVAL224"],
-    "haval256" :["calc_hash2", "shash", ["-a", "HAVAL256"], "HAVAL256"],
-    "md2"      :["calc_hash2", "shash", ["-a", "MD2"], "MD2"],
-    "md4"      :["calc_hash2", "shash", ["-a", "MD4"], "MD4"],
-    "md5"      :["calc_hash2", "shash", ["-a", "MD5"], "MD5"],
-    "ripemd128":["calc_hash2", "shash", ["-a", "RIPEMD128"], "RIPEMD128"],
-    "ripemd160":["calc_hash2", "shash", ["-a", "RIPEMD160"], "RIPEMD160"],
-    "ripemd256":["calc_hash2", "shash", ["-a", "RIPEMD256"], "RIPEMD256"],
-    "ripemd320":["calc_hash2", "shash", ["-a", "RIPEMD320"], "RIPEMD320"],
-    "sha1"     :["calc_hash2", "shash", ["-a", "SHA1"], "SHA1"],
-    "sha224"   :["calc_hash2", "shash", ["-a", "SHA224"], "SHA224"],
-    "sha256"   :["calc_hash2", "shash", ["-a", "SHA256"], "SHA256"],
-    "sha384"   :["calc_hash2", "shash", ["-a", "SHA384"], "SHA384"],
-    "sha512"   :["calc_hash2", "shash", ["-a", "SHA512"], "SHA512"],
-    "snefru128":["calc_hash2", "shash", ["-a", "SNEFRU128"], "SNEFRU128"],
-    "snefru256":["calc_hash2", "shash", ["-a", "SNEFRU256"], "SNEFRU256"],
-    "tiger"    :["calc_hash2", "shash", ["-a", "TIGER"], "TIGER"],
-    "tiger128" :["calc_hash2", "shash", ["-a", "TIGER128"], "TIGER128"],
-    "tiger160" :["calc_hash2", "shash", ["-a", "TIGER160"], "TIGER160"],
-    "whirlpool":["calc_hash2", "shash", ["-a", "WHIRLPOOL"], "WHIRLPOOL"],
-}
-
-
-class HashMap():
-    '''Class for handling
-    Catalyst's hash generation'''
-
-    fields = ["func", "cmd", "args", "id"]
-
-    def __init__(self, hashes=None):
-        '''Class init
-
-        @param hashes: dictionary of Key:[function, cmd, cmd_args, Print 
string]
-        @param fields: list of ordered field names for the hashes
-                eg: ["func", "cmd", "args", "id"]
-        '''
-        if hashes is None:
-            hashes = {}
-        self.hash_map = {}
-
-        # create the hash definition namedtuple classes
-        for name in list(hashes):
-            obj = namedtuple(name, self.fields)
-            obj.__slots__ = ()
-            self.hash_map[name] = obj._make(hashes[name])
-        del obj
-
-    def generate_hash(self, file_, hash_="crc32"):
-        '''Prefered method of generating a hash for the passed in file_
-
-        @param file_: the file to generate the hash for
-        @param hash_: the hash algorythm to use
-        @returns the hash result
-        '''
-        try:
-            return getattr(self, self.hash_map[hash_].func)(
-                file_,
-                hash_)
-        except:
-            raise CatalystError("Error generating hash, is appropriate " +
-                                "utility installed on your system?", 
print_traceback=True)
-
-    def calc_hash(self, file_, hash_):
-        '''
-        Calculate the hash for "file_"
-
-        @param file_: the file to generate the hash for
-        @param hash_: the hash algorythm to use
-        @returns the hash result
-        '''
-        _hash = self.hash_map[hash_]
-        args = [_hash.cmd]
-        args.extend(_hash.args)
-        args.append(file_)
-        log.debug('args = %r', args)
-        source = Popen(args, stdout=PIPE)
-        output = source.communicate()
-        mylines = output[0].decode('ascii')
-        log.info('%s (%s) = %s', _hash.id, file_, mylines)
-        result = "# " + _hash.id + " (b2sum) HASH\n" + mylines
-        return result
-
-    def calc_hash2(self, file_, hash_type):
-        '''
-        Calculate the hash for "file_"
-
-        @param file_: the file to generate the hash for
-        @param hash_: the hash algorythm to use
-        @returns the hash result
-        '''
-        _hash = self.hash_map[hash_type]
-        args = [_hash.cmd]
-        args.extend(_hash.args)
-        args.append(file_)
-        log.debug('args = %r', args)
-        source = Popen(args, stdout=PIPE)
-        output = source.communicate()
-        lines = output[0].decode('ascii').split('\n')
-        log.debug('output = %s', output)
-        header = lines[0]
-        h_f = lines[1].split()
-        hash_result = h_f[0]
-        short_file = os.path.split(h_f[1])[1]
-        result = header + "\n" + hash_result + "  " + short_file + "\n"
-        log.info('%s (%s) = %s', header, short_file, result)
-        return result

diff --git a/catalyst/main.py b/catalyst/main.py
index cb3cd3f7..d8e791c4 100644
--- a/catalyst/main.py
+++ b/catalyst/main.py
@@ -1,5 +1,6 @@
 import argparse
 import datetime
+import hashlib
 import os
 import sys
 
@@ -13,7 +14,6 @@ from DeComp.contents import ContentsMap
 from catalyst import log
 import catalyst.config
 from catalyst.defaults import confdefaults, option_messages, 
DEFAULT_CONFIG_FILE
-from catalyst.hash_utils import HashMap, HASH_DEFINITIONS
 from catalyst.support import CatalystError
 from catalyst.version import get_version
 
@@ -335,55 +335,23 @@ def _main(parser, opts):
                                list_xattrs_opt=conf_values['list_xattrs_opt'])
     conf_values["contents_map"] = contents_map
 
-    # initialze our hash and contents generators
-    hash_map = HashMap(HASH_DEFINITIONS)
-    conf_values["hash_map"] = hash_map
-
     # initialize our (de)compression definitions
     conf_values['decompress_definitions'] = DECOMPRESS_DEFINITIONS
     conf_values['compress_definitions'] = COMPRESS_DEFINITIONS
     # TODO add capability to config/spec new definitions
 
-    # Start checking that digests are valid now that hash_map is initialized
     if "digests" in conf_values:
         digests = set(conf_values['digests'].split())
-        valid_digests = set(HASH_DEFINITIONS.keys())
-
-        # Use the magic keyword "auto" to use all algos that are available.
-        skip_missing = False
-        if 'auto' in digests:
-            skip_missing = True
-            digests.remove('auto')
-            if not digests:
-                digests = set(valid_digests)
+        valid_digests = hashlib.algorithms_available
 
         # First validate all the requested digests are valid keys.
         if digests - valid_digests:
             log.critical(
-                'These are not valid digest entries:\n'
-                '%s\n'
-                'Valid digest entries:\n'
-                '%s',
-                ', '.join(digests - valid_digests),
+                'These are not valid digest entries:\n%s\n'
+                'Valid digest entries:\n%s',
+                ', '.join(sorted(digests - valid_digests)),
                 ', '.join(sorted(valid_digests)))
 
-        # Then check for any programs that the hash func requires.
-        for digest in digests:
-            try:
-                process.find_binary(hash_map.hash_map[digest].cmd)
-            except process.CommandNotFound:
-                # In auto mode, just ignore missing support.
-                if skip_missing:
-                    digests.remove(digest)
-                    continue
-                log.critical(
-                    'The "%s" binary needed by digest "%s" was not found. '
-                    'It needs to be in your system path.',
-                    hash_map.hash_map[digest].cmd, digest)
-
-        # Now reload the config with our updated value.
-        conf_values['digests'] = ' '.join(digests)
-
     addlargs = {}
 
     if myspecfile:

diff --git a/doc/catalyst-config.5.txt b/doc/catalyst-config.5.txt
index d5e8c128..14b7dd5a 100644
--- a/doc/catalyst-config.5.txt
+++ b/doc/catalyst-config.5.txt
@@ -181,10 +181,10 @@ much RAM everything will fail horribly and it is not our 
fault.
 
 SUPPORTED HASHES
 ----------------
-Supported hashes: adler32, crc32, crc32b, gost, haval128, haval160,
-haval192, haval224, haval256, md2, md4, md5, ripemd128, ripemd160,
-ripemd256, ripemd320, sha1, sha224, sha256, sha384, sha512, snefru128,
-snefru256, tiger, tiger128, tiger160, whirlpool.
+The list of supported hashes is dependent on the version of Python. To
+see the list of hashes supported by the version of Python in use, run
+
+    $ python3 -c 'import hashlib; print(hashlib.algorithms_available)'
 
 
 BINARY PACKAGE DEPENDENCIES

diff --git a/etc/catalyst.conf b/etc/catalyst.conf
index 4c4d491e..2e511cce 100644
--- a/etc/catalyst.conf
+++ b/etc/catalyst.conf
@@ -3,16 +3,14 @@
 # Simple descriptions of catalyst settings. Please refer to the online
 # documentation for more information.
 
-# Creates a .DIGESTS file containing the hash output from any of the supported
-# options below.  Adding them all may take a long time on slower systems.  The
-# special "auto" keyword will skip digests that the system does not support,
-# and if it's the only keyword given, will default to enabling all digests.
-# Supported hashes:
-# adler32, blake2, crc32, crc32b, gost, haval128, haval160, haval192, haval224,
-# haval256, md2, md4, md5, ripemd128, ripemd160, ripemd256, ripemd320, sha1,
-# sha224, sha256, sha384, sha512, snefru128, snefru256, tiger, tiger128,
-# tiger160, whirlpool
-digests="blake2 sha512"
+# Creates a .DIGESTS file containing the hash output from each of the selected
+# hashes.
+#
+# To see a list of supported hashes, run
+#
+#     $ python3 -c 'import hashlib; print(hashlib.algorithms_available)'
+#
+digests="blake2b sha512"
 
 # distdir specifies where your distfiles are located. This setting should
 # work fine for most default installations.

Reply via email to