Sync into a quarantine subdirectory, using the rsync --link-dest option to create hardlinks to identical files in the previous snapshot of the repository. If hardlinks are not supported, then show a warning message and sync directly to the normal repository location.
If verification succeeds, then the quarantine subdirectory is synced to the normal repository location, and the quarantine subdirectory is deleted. If verification fails, then the quarantine directory is preserved for purposes of analysis. Even if verification happens to be disabled, the quarantine directory is still useful for making the repository update more atomic, so that it is less likely that normal repository location will be observed in a partially synced state. The new behavior may conflict with configurations that restrict the use of hardlinks, such as overlay filesystems. Therefore, users will have to set "sync-allow-hardlinks = no" in repos.conf if they have a configuration that prevents the use of hardlinks, but this should not be very common. Bug: https://bugs.gentoo.org/660410 --- [PATCH v2] makes it possible to disable the new behavior by setting "sync-allow-hardlinks = no" in repos.conf cnf/repos.conf | 1 + man/portage.5 | 8 +++ pym/portage/repository/config.py | 8 ++- pym/portage/sync/modules/rsync/rsync.py | 89 ++++++++++++++++++++++++++++++--- 4 files changed, 97 insertions(+), 9 deletions(-) diff --git a/cnf/repos.conf b/cnf/repos.conf index 352073cfd5..419f6d1182 100644 --- a/cnf/repos.conf +++ b/cnf/repos.conf @@ -6,6 +6,7 @@ location = /usr/portage sync-type = rsync sync-uri = rsync://rsync.gentoo.org/gentoo-portage auto-sync = yes +sync-allow-hardlinks = yes sync-rsync-verify-jobs = 1 sync-rsync-verify-metamanifest = yes sync-rsync-verify-max-age = 24 diff --git a/man/portage.5 b/man/portage.5 index 5adb07d821..acc80791be 100644 --- a/man/portage.5 +++ b/man/portage.5 @@ -973,6 +973,14 @@ files). Defaults to true. .br Valid values: true, false. .TP +.B sync\-allow\-hardlinks = yes|no +Allow sync plugins to use hardlinks in order to ensure that a repository +remains in a valid state if something goes wrong during the sync operation. +For example, if signature verification fails during a sync operation, +the previous state of the repository will be preserved. This option may +conflict with configurations that restrict the use of hardlinks, such as +overlay filesystems. +.TP .B sync\-cvs\-repo Specifies CVS repository. .TP diff --git a/pym/portage/repository/config.py b/pym/portage/repository/config.py index 1d897bb903..c7440369c2 100644 --- a/pym/portage/repository/config.py +++ b/pym/portage/repository/config.py @@ -86,6 +86,7 @@ class RepoConfig(object): 'sync_type', 'sync_umask', 'sync_uri', 'sync_user', 'thin_manifest', 'update_changelog', '_eapis_banned', '_eapis_deprecated', '_masters_orig', 'module_specific_options', 'manifest_required_hashes', + 'sync_allow_hardlinks', 'sync_openpgp_key_path', 'sync_openpgp_key_refresh_retry_count', 'sync_openpgp_key_refresh_retry_delay_max', @@ -188,6 +189,9 @@ class RepoConfig(object): self.strict_misc_digests = repo_opts.get( 'strict-misc-digests', 'true').lower() == 'true' + self.sync_allow_hardlinks = repo_opts.get( + 'sync-allow-hardlinks', 'true').lower() in ('true', 'yes') + self.sync_openpgp_key_path = repo_opts.get( 'sync-openpgp-key-path', None) @@ -534,6 +538,7 @@ class RepoConfigLoader(object): 'clone_depth', 'eclass_overrides', 'force', 'masters', 'priority', 'strict_misc_digests', 'sync_depth', 'sync_hooks_only_on_change', + 'sync_allow_hardlinks', 'sync_openpgp_key_path', 'sync_openpgp_key_refresh_retry_count', 'sync_openpgp_key_refresh_retry_delay_max', @@ -962,7 +967,8 @@ class RepoConfigLoader(object): def config_string(self): bool_keys = ("strict_misc_digests",) str_or_int_keys = ("auto_sync", "clone_depth", "format", "location", - "main_repo", "priority", "sync_depth", "sync_openpgp_key_path", + "main_repo", "priority", "sync_depth", + "sync_allow_hardlinks", "sync_openpgp_key_path", "sync_openpgp_key_refresh_retry_count", "sync_openpgp_key_refresh_retry_delay_max", "sync_openpgp_key_refresh_retry_delay_exp_base", diff --git a/pym/portage/sync/modules/rsync/rsync.py b/pym/portage/sync/modules/rsync/rsync.py index 382a1eaaef..59211d2bb8 100644 --- a/pym/portage/sync/modules/rsync/rsync.py +++ b/pym/portage/sync/modules/rsync/rsync.py @@ -11,6 +11,7 @@ import functools import io import re import random +import subprocess import tempfile import portage @@ -61,6 +62,56 @@ class RsyncSync(NewBase): def __init__(self): NewBase.__init__(self, "rsync", RSYNC_PACKAGE_ATOM) + def _select_download_dir(self): + ''' + Select and return the download directory. It's desirable to be able + to create shared hardlinks between the download directory to the + normal repository, and this is facilitated by making the download + directory be a subdirectory of the normal repository location + (ensuring that no mountpoints are crossed). Shared hardlinks are + created by using the rsync --link-dest option. + + Since the download is initially unverified, it is safest to save + it in a quarantine directory. The quarantine directory is also + useful for making the repository update more atomic, so that it + less likely that normal repository location will be observed in + a partially synced state. + + This method returns a quarantine directory if sync-allow-hardlinks + is enabled in repos.conf, and otherwise it returne the normal + repository location. + ''' + if self.repo.sync_allow_hardlinks: + return os.path.join(self.repo.location, '.tmp-unverified-download-quarantine') + else: + return self.repo.location + + def _commit_download(self, download_dir): + ''' + Commit changes from download_dir if it does not refer to the + normal repository location. + ''' + exitcode = 0 + if self.repo.location != download_dir: + rsynccommand = [self.bin_command] + self.rsync_opts + self.extra_rsync_opts + rsynccommand.append('--exclude=/%s' % os.path.basename(download_dir)) + rsynccommand.append('%s/' % download_dir.rstrip('/')) + rsynccommand.append('%s/' % self.repo.location) + exitcode = subprocess.call(rsynccommand) + if exitcode == 0: + exitcode = self._remove_download(download_dir) + + return exitcode + + def _remove_download(self, download_dir): + """ + Remove download_dir if it does not refer to the normal repository + location. + """ + exitcode = 0 + if self.repo.location != download_dir: + exitcode = subprocess.call(['rm', '-rf', download_dir]) + return exitcode def update(self): '''Internal update function which performs the transfer''' @@ -97,6 +148,9 @@ class RsyncSync(NewBase): self.extra_rsync_opts.extend(portage.util.shlex_split( self.repo.module_specific_options['sync-rsync-extra-opts'])) + download_dir = self._select_download_dir() + exitcode = 0 + # Process GLEP74 verification options. # Default verification to 'no'; it's enabled for ::gentoo # via default repos.conf though. @@ -219,8 +273,10 @@ class RsyncSync(NewBase): self.proto = "file" dosyncuri = syncuri[7:] unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync( - dosyncuri, timestamp, opts) + dosyncuri, timestamp, opts, download_dir) self._process_exitcode(exitcode, dosyncuri, out, 1) + if exitcode == 0 and not unchanged: + self._commit_download(download_dir) return (exitcode, updatecache_flg) retries=0 @@ -352,7 +408,7 @@ class RsyncSync(NewBase): dosyncuri = dosyncuri[6:].replace('/', ':/', 1) unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync( - dosyncuri, timestamp, opts) + dosyncuri, timestamp, opts, download_dir) if not unchanged: local_state_unchanged = False if is_synced: @@ -369,6 +425,12 @@ class RsyncSync(NewBase): break self._process_exitcode(exitcode, dosyncuri, out, maxretries) + if local_state_unchanged: + # The quarantine download_dir is not intended to exist + # in this case, so refer gemato to the normal repository + # location. + download_dir = self.repo.location + # if synced successfully, verify now if exitcode == 0 and self.verify_metamanifest: if gemato is None: @@ -380,7 +442,7 @@ class RsyncSync(NewBase): # we always verify the Manifest signature, in case # we had to deal with key revocation case m = gemato.recursiveloader.ManifestRecursiveLoader( - os.path.join(self.repo.location, 'Manifest'), + os.path.join(download_dir, 'Manifest'), verify_openpgp=True, openpgp_env=openpgp_env, max_jobs=self.verify_jobs) @@ -411,7 +473,7 @@ class RsyncSync(NewBase): # if nothing has changed, skip the actual Manifest # verification if not local_state_unchanged: - out.ebegin('Verifying %s' % (self.repo.location,)) + out.ebegin('Verifying %s' % (download_dir,)) m.assert_directory_verifies() out.eend(0) except GematoException as e: @@ -420,12 +482,16 @@ class RsyncSync(NewBase): level=logging.ERROR, noiselevel=-1) exitcode = 1 + if exitcode == 0 and not local_state_unchanged: + exitcode = self._commit_download(download_dir) + return (exitcode, updatecache_flg) finally: + if exitcode == 0: + self._remove_download(download_dir) if openpgp_env is not None: openpgp_env.close() - def _process_exitcode(self, exitcode, syncuri, out, maxretries): if (exitcode==0): pass @@ -561,7 +627,7 @@ class RsyncSync(NewBase): return rsync_opts - def _do_rsync(self, syncuri, timestamp, opts): + def _do_rsync(self, syncuri, timestamp, opts, download_dir): updatecache_flg = False is_synced = False if timestamp != 0 and "--quiet" not in opts: @@ -686,6 +752,12 @@ class RsyncSync(NewBase): elif (servertimestamp == 0) or (servertimestamp > timestamp): # actual sync command = rsynccommand[:] + + if self.repo.location != download_dir: + # Use shared hardlinks for files that are identical + # in the previous snapshot of the repository. + command.append('--link-dest=%s' % self.repo.location) + submodule_paths = self._get_submodule_paths() if submodule_paths: # The only way to select multiple directories to @@ -696,9 +768,10 @@ class RsyncSync(NewBase): # /./ is special syntax supported with the # rsync --relative option. command.append(syncuri + "/./" + path) - command.append(self.repo.location) else: - command.extend([syncuri + "/", self.repo.location]) + command.append(syncuri + "/") + + command.append(download_dir) exitcode = None try: -- 2.13.6