Source: diffoscope Version: ada1a1dcdc19217fb611e0a1e57bc3744399aefa Severity: wishlist Tags: patch
It would be useful for diffoscope to output differences in omni.ja files as for other Zip files, instead of ending up with a diff of an hexdump. The attached patch implements a minimal support for this. It however doesn't look at the difference in the `preload` value. -- System Information: Debian Release: stretch/sid APT prefers unstable APT policy: (500, 'unstable'), (1, 'experimental') Architecture: amd64 (x86_64) Foreign Architectures: i386 Kernel: Linux 4.2.0-1-amd64 (SMP w/4 CPU cores) Locale: LANG=ja_JP.UTF-8, LC_CTYPE=ja_JP.UTF-8 (charmap=UTF-8) Shell: /bin/sh linked to /bin/dash Init: systemd (via /run/systemd/system)
diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py index b4615c9..b5dd320 100644 --- a/diffoscope/comparators/__init__.py +++ b/diffoscope/comparators/__init__.py @@ -72,7 +72,7 @@ from diffoscope.comparators.symlink import Symlink from diffoscope.comparators.text import TextFile from diffoscope.comparators.tar import TarFile from diffoscope.comparators.xz import XzFile -from diffoscope.comparators.zip import ZipFile +from diffoscope.comparators.zip import ZipFile, MozillaZipFile def bail_if_non_existing(*paths): @@ -154,6 +154,7 @@ FILE_CLASSES = ( TarFile, XzFile, ZipFile, + MozillaZipFile, ImageFile, ) diff --git a/diffoscope/comparators/zip.py b/diffoscope/comparators/zip.py index ecdc77b..42c9a9f 100644 --- a/diffoscope/comparators/zip.py +++ b/diffoscope/comparators/zip.py @@ -111,3 +111,54 @@ class ZipFile(File): zipinfo_difference = Difference.from_command(Zipinfo, self.path, other.path) or \ Difference.from_command(ZipinfoVerbose, self.path, other.path) return [zipinfo_difference] + + +class MozillaZipCommandMixin(object): + def wait(self): + # zipinfo emits an error when reading Mozilla-optimized ZIPs, + # which is fine to ignore. + super(Zipinfo, self).wait() + return 0 + + +class MozillaZipinfo(MozillaZipCommandMixin, Zipinfo): pass + + +class MozillaZipinfoVerbose(MozillaZipCommandMixin, ZipinfoVerbose): pass + + +class MozillaZipContainer(ZipContainer): + def open_archive(self): + # This is gross: Monkeypatch zipfile._EndRecData to work with + # Mozilla-optimized ZIPs + _orig_EndRecData = zipfile._EndRecData + def _EndRecData(fh): + endrec = _orig_EndRecData(fh) + if endrec: + endrec[zipfile._ECD_LOCATION] = (endrec[zipfile._ECD_OFFSET] + + endrec[zipfile._ECD_SIZE]) + return endrec + zipfile._EndRecData = _EndRecData + result = super(MozillaZipContainer, self).open_archive() + zipfile._EndRecData = _orig_EndRecData + return result + + +class MozillaZipFile(File): + CONTAINER_CLASS = MozillaZipContainer + + @staticmethod + def recognizes(file): + # Mozilla-optimized ZIPs start with a 32-bit little endian integer + # indicating the amount of data to preload, followed by the ZIP + # central directory (with a PK\x01\x02 signature) + with open(file.path, 'rb') as f: + preload = f.read(4) + if len(preload) == 4: + signature = f.read(4) + return signature == b'PK\x01\x02' + + def compare_details(self, other, source=None): + zipinfo_difference = Difference.from_command(MozillaZipinfo, self.path, other.path) or \ + Difference.from_command(MozillaZipinfoVerbose, self.path, other.path) + return [zipinfo_difference]