This is an automated email from the git hooks/post-receive script. smcv pushed a commit to branch master in repository game-data-packager.
commit f7915ca5c14feeab88bfcb759a98738afa83760c Author: Simon McVittie <[email protected]> Date: Tue Dec 22 16:22:14 2015 +0000 Unify code to stream members from a TarFile or ZipFile --- game_data_packager/build.py | 121 ++++----------- game_data_packager/unpack/__init__.py | 282 ++++++++++++++++++++++++++++++++++ game_data_packager/unpack/__main__.py | 41 +++++ 3 files changed, 357 insertions(+), 87 deletions(-) diff --git a/game_data_packager/build.py b/game_data_packager/build.py index 535584a..548d209 100644 --- a/game_data_packager/build.py +++ b/game_data_packager/build.py @@ -41,6 +41,7 @@ except ImportError: from .gog import GOG from .paths import (DATADIR, ETCDIR) +from .unpack import (TarUnpacker, ZipUnpacker) from .util import (AGENT, MEBIBYTE, PACKAGE_CACHE, @@ -616,9 +617,9 @@ class PackagingTask(object): elif extension.lower() == '.deb' and which('dpkg-deb'): with subprocess.Popen(['dpkg-deb', '--fsys-tarfile', path], stdout=subprocess.PIPE) as fsys_process: - with tarfile.open(path + '//data.tar.*', mode='r|', - fileobj=fsys_process.stdout) as tar: - self.consider_tar_stream(path, tar) + with TarUnpacker(path + '//data.tar.*', + reader=fsys_process.stdout, compression='') as tar: + self.consider_stream(path, tar) def _log_not_any_of(self, path, size, hashes, why, candidates): message = ('found %s but it is not one of the expected ' + @@ -723,7 +724,7 @@ class PackagingTask(object): logger.debug('%s: %s', package.name, result) return result - def consider_zip(self, name, zf, provider=None): + def consider_stream(self, name, unpacker, provider=None): if provider is None: try_to_unpack = self.game.files should_provide = set() @@ -733,8 +734,8 @@ class PackagingTask(object): should_provide = set(try_to_unpack) distinctive_dirs = provider.unpack.get('distinctive_dirs', True) - for entry in zf.infolist(): - if not entry.file_size and entry.filename.endswith('/'): + for entry in unpacker: + if not entry.is_extractable or not entry.is_regular_file: continue for filename in try_to_unpack: @@ -746,10 +747,10 @@ class PackagingTask(object): if wanted.alternatives: continue - if wanted.size is not None and wanted.size != entry.file_size: + if wanted.size not in (None, entry.size): continue - match_path = '/' + entry.filename.lower() + match_path = '/' + entry.name.lower() for lf in wanted.look_for: if not distinctive_dirs: @@ -760,7 +761,7 @@ class PackagingTask(object): if filename in self.found: continue - entryfile = zf.open(entry) + entryfile = unpacker.open(entry) tmp = os.path.join(self.get_workdir(), 'tmp', wanted.name) @@ -768,77 +769,28 @@ class PackagingTask(object): mkdir_p(tmpdir) wf = open(tmp, 'wb') - if entry.file_size > QUITE_LARGE: - logger.info('extracting %s from %s', entry.filename, name) - else: - logger.debug('extracting %s from %s', entry.filename, name) - hf = HashedFile.from_file( - name + '//' + entry.filename, entryfile, wf, - size=entry.file_size, - progress=(entry.file_size > QUITE_LARGE)) - wf.close() - orig_time = time.mktime(entry.date_time + (0, 0, -1)) - os.utime(tmp, (orig_time, orig_time)) - - if not self.use_file(wanted, tmp, hf): - os.remove(tmp) - - if should_provide: - for missing in sorted(should_provide): - logger.error('%s should have provided %s but did not', - name, missing) - - def consider_tar_stream(self, name, tar, provider=None): - if provider is None: - try_to_unpack = self.game.files - should_provide = set() - else: - try_to_unpack = set(f.name for f in provider.provides_files) - should_provide = set(try_to_unpack) - - for entry in tar: - if not entry.isfile(): - continue - - for filename in try_to_unpack: - wanted = self.game.files.get(filename) - - if wanted is None: - continue - if wanted.alternatives: - continue - - if wanted.size is not None and wanted.size != entry.size: - continue - - match_path = '/' + entry.name.lower() - - for lf in wanted.look_for: - if match_path.endswith('/' + lf): - should_provide.discard(filename) - - if filename in self.found: - continue - - entryfile = tar.extractfile(entry) - - tmp = os.path.join(self.get_workdir(), - 'tmp', wanted.name) - tmpdir = os.path.dirname(tmp) - mkdir_p(tmpdir) - - wf = open(tmp, 'wb') - if entry.size > QUITE_LARGE: + if entry.size is not None and entry.size > QUITE_LARGE: + large = True logger.info('extracting %s from %s', entry.name, name) else: + large = False logger.debug('extracting %s from %s', entry.name, name) hf = HashedFile.from_file( name + '//' + entry.name, entryfile, wf, - size=entry.size, - progress=(entry.size > QUITE_LARGE)) + size=entry.size, progress=large) wf.close() - os.utime(tmp, (entry.mtime, entry.mtime)) + + if entry.mtime is not None: + orig_time = entry.mtime + elif provider is not None: + orig_name = self.found[provider.name] + orig_time = os.stat(orig_name).st_mtime + else: + orig_time = None + + if orig_time is not None: + os.utime(tmp, (orig_time, orig_time)) if not self.use_file(wanted, tmp, hf): os.remove(tmp) @@ -1071,26 +1023,21 @@ class PackagingTask(object): os.utime(tmp, (orig_time, orig_time)) self.use_file(wanted, tmp, None) elif fmt in ('tar.gz', 'tar.bz2', 'tar.xz'): - rf = open(found_name, 'rb') - if 'skip' in provider.unpack: - skipped = rf.read(provider.unpack['skip']) - assert len(skipped) == provider.unpack['skip'] - with tarfile.open( - found_name, - mode='r|' + fmt[4:], - fileobj=rf) as tar: - self.consider_tar_stream(found_name, tar, provider) + reader = open(found_name, 'rb') + with TarUnpacker(found_name, reader, compression=fmt[4:], + skip=provider.unpack.get('skip', 0)) as tar: + self.consider_stream(found_name, tar, provider) elif fmt == 'deb': with subprocess.Popen(['dpkg-deb', '--fsys-tarfile', found_name], stdout=subprocess.PIPE) as fsys_process: - with tarfile.open(found_name + '//data.tar.*', mode='r|', - fileobj=fsys_process.stdout) as tar: - self.consider_tar_stream(found_name, tar, provider) + with TarUnpacker(found_name + '//data.tar.*', + fsys_process.stdout, compression='') as tar: + self.consider_stream(found_name, tar, provider) elif fmt == 'zip': if provider.name.startswith('gog_'): package.used_sources.add(provider.name) - with zipfile.ZipFile(found_name, 'r') as zf: - self.consider_zip(found_name, zf, provider) + with ZipUnpacker(found_name) as unpacker: + self.consider_stream(found_name, unpacker, provider) elif fmt == 'lha': to_unpack = provider.unpack.get('unpack', [f.name for f in provider.provides_files]) diff --git a/game_data_packager/unpack/__init__.py b/game_data_packager/unpack/__init__.py new file mode 100644 index 0000000..59373a6 --- /dev/null +++ b/game_data_packager/unpack/__init__.py @@ -0,0 +1,282 @@ +#!/usr/bin/python3 +# encoding=utf-8 +# +# Copyright © 2014-2015 Simon McVittie <[email protected]> +# Copyright © 2015 Alexandre Detiste <[email protected]> +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# You can find the GPL license text on a Debian system under +# /usr/share/common-licenses/GPL-2. + +from abc import (ABCMeta, abstractmethod) +import errno +import os +import shlex +import shutil +import tarfile +import time +import zipfile + +class UnpackableEntry(metaclass=ABCMeta): + """An entry in a StreamUnpackable. + """ + @property + @abstractmethod + def is_directory(self): + raise NotImplementedError + + @property + @abstractmethod + def is_regular_file(self): + """True if the entry is a regular file. False if it is a + directory, symlink, or some special thing like an instruction + to patch some other file. + """ + raise NotImplementedError + + @property + def is_extractable(self): + """True if the entry is something that we can extract. + + The default implementation is that we can extract regular files. + """ + return self.is_regular_file + + @property + def mtime(self): + """The last-modification time, or None if unspecified.""" + return None + + @property + @abstractmethod + def name(self): + """The absolute or relative filename, with Unix path separators.""" + raise NotImplementedError + + @property + @abstractmethod + def size(self): + """The size in bytes.""" + raise NotImplementedError + + @property + def type_indicator(self): + """One or more ASCII symbols indicating the file type.""" + if self.is_directory: + ret = 'd' + elif self.is_regular_file: + ret = '-' + else: + ret = '?' + + if self.is_extractable: + ret += 'r' + else: + ret += '-' + + return ret + +class StreamUnpackable(metaclass=ABCMeta): + """An archive in which entries can be inspected and extracted + by iteration. + """ + + @abstractmethod + def __iter__(self): + """Iterate through UnpackableEntry objects.""" + raise NotImplementedError + + @abstractmethod + def open(self, member): + """Open a binary file-like entry for the name or entry. + """ + raise NotImplementedError + + def extract(self, member, path=None): + """Extract the given member from the archive into the given + directory. + """ + with self.open(member) as reader: + filename = reader.entry.name + filename = filename.lstrip('/') + + while filename.startswith('../'): + filename = filename[3:] + filename = filename.replace('/../', '/') + if filename.endswith('/..'): + filename = filename[:-3] + if filename.endswith('/'): + filename = filename[:-1] + if path is None: + path = '.' + + dest = os.path.join(path, filename) + os.makedirs(os.path.dirname(dest), exist_ok=True) + + try: + os.remove(dest) + except OSError as e: + if e.errno != errno.ENOENT: + raise + + with open(dest, 'xb') as writer: + shutil.copyfileobj(reader, writer) + + def extractall(self, path, members=None): + for entry in self: + if entry.is_extractable: + if members is None or entry.name in members: + self.extract(entry, path) + + def printdir(self): + for entry in self: + if entry.size is None: + size = '?' * 9 + else: + size = '%9s' % entry.size + + if entry.mtime is not None: + mtime = time.strftime('%Y-%m-%d %H:%M:%S', + time.gmtime(entry.mtime)) + else: + mtime = '????-??-?? ??:??:??' + + print('%s %s %s %s' % (entry.type_indicator, size, mtime, + shlex.quote(entry.name))) + +class WrapperUnpacker(StreamUnpackable): + """Base class for a StreamUnpackable that wraps a TarFile-like object.""" + + def __init__(self): + self._impl = None + + @abstractmethod + def _wrap_entry(self, entry): + raise NotImplementedError + + @abstractmethod + def _is_entry(self, entry): + raise NotImplementedError + + def __enter__(self): + return self + + def __exit__(self, ex_type, ex_value, ex_traceback): + if self._impl is not None: + self._impl.close() + self._impl = None + + def __iter__(self): + for entry in self._impl: + yield self._wrap_entry(entry) + + def open(self, entry): + assert self._is_entry(entry) + return self._impl.open(entry.impl) + +class TarEntry(UnpackableEntry): + __slots__ = 'impl' + + def __init__(self, impl): + self.impl = impl + + @property + def is_extractable(self): + return True + + @property + def is_directory(self): + return self.impl.isdir() + + @property + def is_regular_file(self): + return self.impl.isfile() + + @property + def mtime(self): + return self.impl.mtime + + @property + def name(self): + return self.impl.name + + @property + def size(self): + return self.impl.size + +class TarUnpacker(WrapperUnpacker): + def __init__(self, name, reader=None, compression='*', skip=0): + super(TarUnpacker, self).__init__() + + if reader is None: + reader = open(name, 'rb') + + if skip: + discard = reader.read(skip) + assert len(discard) == skip + + self._impl = tarfile.open(name, mode='r|' + compression, + fileobj=reader) + + def open(self, entry): + assert isinstance(entry, TarEntry) + return self._impl.extractfile(entry.impl) + + def _is_entry(self, entry): + return isinstance(entry, TarEntry) + + def _wrap_entry(self, entry): + return TarEntry(entry) + +class ZipEntry(UnpackableEntry): + __slots__ = 'impl' + + def __init__(self, impl): + self.impl = impl + + @property + def is_extractable(self): + return True + + @property + def is_directory(self): + return self.name.endswith('/') + + @property + def is_regular_file(self): + return not self.name.endswith('/') + + @property + def mtime(self): + return time.mktime(self.impl.date_time + (0, 0, -1)) + + @property + def name(self): + return self.impl.filename + + @property + def size(self): + return self.impl.file_size + +class ZipUnpacker(WrapperUnpacker): + def __init__(self, name): + super(ZipUnpacker, self).__init__() + self._impl = zipfile.ZipFile(name, 'r') + + def __iter__(self): + for entry in self._impl.infolist(): + yield ZipEntry(entry) + + def _is_entry(self, entry): + return isinstance(entry, ZipEntry) + + def _wrap_entry(self, entry): + return ZipEntry(self) diff --git a/game_data_packager/unpack/__main__.py b/game_data_packager/unpack/__main__.py new file mode 100644 index 0000000..3ef4640 --- /dev/null +++ b/game_data_packager/unpack/__main__.py @@ -0,0 +1,41 @@ +#!/usr/bin/python3 +# encoding=utf-8 +# +# Copyright © 2015 Simon McVittie <[email protected]> +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# You can find the GPL license text on a Debian system under +# /usr/share/common-licenses/GPL-2. + +import argparse +import tarfile +import zipfile + +from . import (TarUnpacker, ZipUnpacker) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--output', '-o', help='extract to OUTPUT', + default=None) + parser.add_argument('archive') + args = parser.parse_args() + + if zipfile.is_zipfile(args.archive): + unpacker = ZipUnpacker(args.archive) + elif tarfile.is_tarfile(args.archive): + unpacker = TarUnpacker(args.archive) + else: + raise SystemExit('Cannot work out how to unpack %r' % args.archive) + + if args.output: + unpacker.extractall(args.output) + else: + unpacker.printdir() -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-games/game-data-packager.git _______________________________________________ Pkg-games-commits mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-games-commits

