Hi, I'm sending attached a patch that does the basic implementation of Distfile Patching Support for Portage. This is part of my GSoC project [1].
It relies on the tools provided by app-portage/distpatch, commited to the tree today, and is able to get an old tarball, plus a XZ compressed binary delta and reconstruct the needed tarball, if a delta is available. You may find a few sample deltas and a delta.db in my public page in soc.dev.gentoo.org [2]. More deltas will be available soon. These ones are just for tests. Basic steps to test: (be careful with the commands! :) # echo 'DELTAS_ROOT_URL="http://soc.dev.gentoo.org/~rafaelmartins/"' >> /etc/make.conf # echo 'FEATURES="${FEATURES} distpatch"' >> /etc/make.conf # wget -O $(portageq distdir)/delta.db http://soc.dev.gentoo.org/~rafaelmartins/delta.db # emerge --fetchonly =grep-2.8 # rm $(portageq distdir)/grep-2.9.tar.xz # emerge -av1 =grep-2.9 You'll see that the fetch size reported is pretty smaller than the full tarball (80kB X >1MB). If you hit return, a delta will be fetched and saved to $DISTDIR/deltas, the tarball will be reconstructed, the checksums will be verified and the package will be installed. The reconstructed tarball will be saved to $DISTDIR, if the checksums match with the original tarball, ofrto $DISTDIR/delta-reconstructed, if the checksums from compressed files unmatched but the checksums for the uncompressed files matched. This is handled in a secure way by the distpatch tools and the delta.db file. Currently the delta.db file is fetched manually and placed in $DISTDIR because we don't decided yet about how it will be shipped to users. Probably through rsync, but we can't say this before have a "real" deployment of the delta generators. This will require a little patch soon. This patch isn't obtrusive, and shouldn't affect any users with FEATURES="-distpatch". All the variable names can be changed, if wanted. I'm not very good at choose names :) That's it. [1] - http://www.gentoo.org/proj/en/infrastructure/distpatch/ [2] - http://soc.dev.gentoo.org/~rafaelmartins/ Regards, -- Rafael Goncalves Martins Gentoo Linux developer http://rafaelmartins.eng.br/
From 32e2626e849e221bace8c4150c82efb4170f6807 Mon Sep 17 00:00:00 2001 From: "Rafael G. Martins" <rafaelmart...@gentoo.org> Date: Thu, 11 Aug 2011 04:25:57 -0300 Subject: [PATCH] distpatch: basic implementation of Distfile Patching Support. This patch allows Portage to call the tools from app-portage/distpatch to reconstruct distfiles from binary deltas and validate them. --- pym/portage/const.py | 6 +- pym/portage/dbapi/porttree.py | 24 ++++++- pym/portage/package/ebuild/doebuild.py | 3 + pym/portage/package/ebuild/fetch.py | 116 +++++++++++++++++++++++++++----- 4 files changed, 128 insertions(+), 21 deletions(-) diff --git a/pym/portage/const.py b/pym/portage/const.py index ecaa8f1..6b211f8 100644 --- a/pym/portage/const.py +++ b/pym/portage/const.py @@ -71,6 +71,8 @@ PRELINK_BINARY = "/usr/sbin/prelink" INVALID_ENV_FILE = "/etc/spork/is/not/valid/profile.env" REPO_NAME_FILE = "repo_name" REPO_NAME_LOC = "profiles" + "/" + REPO_NAME_FILE +DELTADB_FILE = "delta.db" +DELTAS_DIR = "deltas" PORTAGE_PACKAGE_ATOM = "sys-apps/portage" LIBC_PACKAGE_ATOM = "virtual/libc" @@ -89,8 +91,8 @@ SUPPORTED_FEATURES = frozenset([ "allow-missing-manifests", "assume-digests", "binpkg-logs", "buildpkg", "buildsyspkg", "candy", "ccache", "chflags", "collision-protect", "compress-build-logs", - "digest", "distcc", "distcc-pump", "distlocks", "ebuild-locks", "fakeroot", - "fail-clean", "fixpackages", "force-mirror", "getbinpkg", + "digest", "distcc", "distcc-pump", "distlocks", "distpatch", "ebuild-locks", + "fakeroot", "fail-clean", "fixpackages", "force-mirror", "getbinpkg", "installsources", "keeptemp", "keepwork", "fixlafiles", "lmirror", "metadata-transfer", "mirror", "multilib-strict", "news", "noauto", "noclean", "nodoc", "noinfo", "noman", diff --git a/pym/portage/dbapi/porttree.py b/pym/portage/dbapi/porttree.py index bf8ecd9..e19adbf 100644 --- a/pym/portage/dbapi/porttree.py +++ b/pym/portage/dbapi/porttree.py @@ -15,11 +15,13 @@ portage.proxy.lazyimport.lazyimport(globals(), 'portage.util:ensure_dirs,shlex_split,writemsg,writemsg_level', 'portage.util.listdir:listdir', 'portage.versions:best,catpkgsplit,_pkgsplit@pkgsplit,ver_regexp', + 'subprocess', ) from portage.cache import metadata_overlay, volatile from portage.cache.cache_errors import CacheError from portage.cache.mappings import Mapping +from portage.const import DELTADB_FILE, DELTAS_DIR from portage.dbapi import dbapi from portage.exception import PortageException, \ FileNotFound, InvalidAtom, InvalidDependString, InvalidPackageName @@ -588,7 +590,27 @@ class portdbapi(dbapi): # into account? check checksums? for myfile in myfiles: try: - fetch_size = int(checksums[myfile]["size"]) + if "distpatch" in self.settings.features: + try: + remaining_size = subprocess.check_output([ + "distpatchq", + "delta_fetch_size", + os.path.join(self.settings["DISTDIR"], DELTADB_FILE), + myfile, + self.settings["DISTDIR"], + os.path.join(self.settings["DISTDIR"], DELTAS_DIR), + ], env=self.settings.environ()) + except subprocess.CalledProcessError, e: + fetch_size = int(checksums[myfile]["size"]) + else: + try: + filesdict[myfile] = int(remaining_size) + except ValueError: + fetch_size = int(checksums[myfile]["size"]) + else: + continue + else: + fetch_size = int(checksums[myfile]["size"]) except (KeyError, ValueError): if debug: writemsg(_("[bad digest]: missing %(file)s for %(pkg)s\n") % {"file":myfile, "pkg":mypkg}) diff --git a/pym/portage/package/ebuild/doebuild.py b/pym/portage/package/ebuild/doebuild.py index a710e09..19963bb 100644 --- a/pym/portage/package/ebuild/doebuild.py +++ b/pym/portage/package/ebuild/doebuild.py @@ -1037,6 +1037,9 @@ def _prepare_fake_distdir(settings, alist): for x in alist: symlink_path = os.path.join(edpath, x) target = os.path.join(orig_distdir, x) + reconstructed_target = os.path.join(orig_distdir, "delta-reconstructed", x) + if "distpatch" in settings.features and os.path.exists(reconstructed_target): + target = reconstructed_target try: link_target = os.readlink(symlink_path) except OSError: diff --git a/pym/portage/package/ebuild/fetch.py b/pym/portage/package/ebuild/fetch.py index 5cbbf87..c86317c 100644 --- a/pym/portage/package/ebuild/fetch.py +++ b/pym/portage/package/ebuild/fetch.py @@ -22,13 +22,14 @@ portage.proxy.lazyimport.lazyimport(globals(), 'portage.package.ebuild.doebuild:doebuild_environment,' + \ '_doebuild_spawn', 'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs', + 'subprocess' ) from portage import OrderedDict, os, selinux, _encodings, \ _shell_quote, _unicode_encode from portage.checksum import hashfunc_map, perform_md5, verify_all from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \ - GLOBAL_CONFIG_PATH + GLOBAL_CONFIG_PATH, DELTADB_FILE, DELTAS_DIR from portage.data import portage_gid, portage_uid, secpass, userpriv_groups from portage.exception import FileNotFound, OperationNotPermitted, \ PortageException, TryAgain @@ -96,6 +97,33 @@ def _spawn_fetch(settings, args, **kwargs): return rval +def _spawn_distpatch(settings, myfile, **kwargs): + phase_backup = settings.get('EBUILD_PHASE') + settings['EBUILD_PHASE'] = 'fetch' + + # mangle distpatcher args + args = [ + 'distpatcher', + '--db', os.path.join(settings['DISTDIR'], DELTADB_FILE), + '--output', settings['DISTDIR'], + '--input', os.path.join(settings['DISTDIR'], DELTAS_DIR), + '--distfile' + ] + if not settings.get('PORTAGE_QUIET', False): + args.append('--verbose') + args.append(myfile) + + # run distpatcher + try: + rval = spawn(args, env=settings.environ(), **kwargs) + finally: + if phase_backup is None: + settings.pop('EBUILD_PHASE', None) + else: + settings['EBUILD_PHASE'] = phase_backup + + return rval + _userpriv_test_write_file_cache = {} _userpriv_test_write_cmd_script = ">> %(file_path)s 2>/dev/null ; rval=$? ; " + \ "rm -f %(file_path)s ; exit $rval" @@ -572,6 +600,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0, pruned_digests["size"] = size myfile_path = os.path.join(mysettings["DISTDIR"], myfile) + myfile_reconstructed_path = os.path.join(mysettings["DISTDIR"], "delta-reconstructed", myfile) has_space = True has_space_superuser = True file_lock = None @@ -739,13 +768,30 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0, raise del e + have_myfile = False + have_distpatch = False try: mystat = os.stat(myfile_path) except OSError as e: if e.errno not in (errno.ENOENT, errno.ESTALE): raise + else: + if 'distpatch' in mysettings.features: + try: + mystat = os.stat(myfile_reconstructed_path) + except OSError as _e: + if e.errno not in (errno.ENOENT, errno.ESTALE): + raise + del _e + else: + myfile_path = myfile_reconstructed_path + have_myfile = True + have_distpatch = True del e else: + have_myfile = True + + if have_myfile: try: apply_secpass_permissions( myfile_path, gid=portage_gid, mode=0o664, mask=0o2, @@ -784,23 +830,38 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0, verified_ok, reason = verify_all( myfile_path, mydigests[myfile]) if not verified_ok: - writemsg(_("!!! Previously fetched" - " file: '%s'\n") % myfile, noiselevel=-1) - writemsg(_("!!! Reason: %s\n") % reason[0], - noiselevel=-1) - writemsg(_("!!! Got: %s\n" - "!!! Expected: %s\n") % \ - (reason[1], reason[2]), noiselevel=-1) - if reason[0] == _("Insufficient data for checksum verification"): - return 0 - if distdir_writable: - temp_filename = \ - _checksum_failure_temp_file( - mysettings["DISTDIR"], myfile) - writemsg_stdout(_("Refetching... " - "File renamed to '%s'\n\n") % \ - temp_filename, noiselevel=-1) - else: + if not have_distpatch: + writemsg(_("!!! Previously fetched" + " file: '%s'\n") % myfile, noiselevel=-1) + writemsg(_("!!! Reason: %s\n") % reason[0], + noiselevel=-1) + writemsg(_("!!! Got: %s\n" + "!!! Expected: %s\n") % \ + (reason[1], reason[2]), noiselevel=-1) + if reason[0] == _("Insufficient data for checksum verification"): + return 0 + if distdir_writable: + temp_filename = \ + _checksum_failure_temp_file( + mysettings["DISTDIR"], myfile) + writemsg_stdout(_("Refetching... " + "File renamed to '%s'\n\n") % \ + temp_filename, noiselevel=-1) + else: + eout = EOutput() + eout.quiet = mysettings.get("PORTAGE_QUIET", None) == "1" + eout.ebegin("%s reconstructed file, verifying checksums using distpatch" % myfile) + myret = subprocess.call([ + "distpatchq", + "delta_verify_checksums", + os.path.join(mysettings["DISTDIR"], DELTADB_FILE), + myfile, + mysettings["DISTDIR"], + ], env=mysettings.environ()) + eout.eend(myret) + if myret == 0: + continue + if verified_ok: eout = EOutput() eout.quiet = \ mysettings.get("PORTAGE_QUIET", None) == "1" @@ -937,6 +998,25 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0, command_var = resumecommand_var else: #normal mode: + if "distpatch" in mysettings.features: + myret = _spawn_distpatch(mysettings, myfile) + if myret == 0: + reconstructed_dir = os.path.join(mysettings["DISTDIR"], "delta-reconstructed") + if os.path.isdir(reconstructed_dir) and myfile in os.listdir(reconstructed_dir): + eout = EOutput() + eout.quiet = mysettings.get("PORTAGE_QUIET", None) == "1" + eout.ebegin("%s reconstructed file, verifying checksums using distpatch" % myfile) + myret = subprocess.call([ + "distpatchq", + "delta_verify_checksums", + os.path.join(mysettings["DISTDIR"], DELTADB_FILE), + myfile, + mysettings["DISTDIR"], + ], env=mysettings.environ()) + eout.eend(myret) + if myret == 0: + fetched = 2 + break locfetch=fetchcommand command_var = fetchcommand_var writemsg_stdout(_(">>> Downloading '%s'\n") % \ -- 1.7.6