Sync into a quarantine subdirectory, using the rsync --link-dest option
to create hardlinks to identical files in the previous snapshot of the
repository. If hardlinks are not supported, then show a warning message
and sync directly to the normal repository location.

If verification succeeds, then the quarantine subdirectory is synced
to the normal repository location, and the quarantine subdirectory
is deleted. If verification fails, then the quarantine directory is
preserved for purposes of analysis.

Even if verification happens to be disabled, the quarantine directory
is still useful for making the repository update more atomic, so that
it is less likely that normal repository location will be observed in
a partially synced state.

The new behavior may conflict with configurations that restrict the
use of hardlinks, such as overlay filesystems. Therefore, users will
have to set "sync-allow-hardlinks = no" in repos.conf if they have
a configuration that prevents the use of hardlinks, but this should
not be very common.

Bug: https://bugs.gentoo.org/660410
---
[PATCH v2] makes it possible to disable the new behavior by setting
"sync-allow-hardlinks = no" in repos.conf

 cnf/repos.conf                          |  1 +
 man/portage.5                           |  8 +++
 pym/portage/repository/config.py        |  8 ++-
 pym/portage/sync/modules/rsync/rsync.py | 89 ++++++++++++++++++++++++++++++---
 4 files changed, 97 insertions(+), 9 deletions(-)

diff --git a/cnf/repos.conf b/cnf/repos.conf
index 352073cfd5..419f6d1182 100644
--- a/cnf/repos.conf
+++ b/cnf/repos.conf
@@ -6,6 +6,7 @@ location = /usr/portage
 sync-type = rsync
 sync-uri = rsync://rsync.gentoo.org/gentoo-portage
 auto-sync = yes
+sync-allow-hardlinks = yes
 sync-rsync-verify-jobs = 1
 sync-rsync-verify-metamanifest = yes
 sync-rsync-verify-max-age = 24
diff --git a/man/portage.5 b/man/portage.5
index 5adb07d821..acc80791be 100644
--- a/man/portage.5
+++ b/man/portage.5
@@ -973,6 +973,14 @@ files). Defaults to true.
 .br
 Valid values: true, false.
 .TP
+.B sync\-allow\-hardlinks = yes|no
+Allow sync plugins to use hardlinks in order to ensure that a repository
+remains in a valid state if something goes wrong during the sync operation.
+For example, if signature verification fails during a sync operation,
+the previous state of the repository will be preserved. This option may
+conflict with configurations that restrict the use of hardlinks, such as
+overlay filesystems.
+.TP
 .B sync\-cvs\-repo
 Specifies CVS repository.
 .TP
diff --git a/pym/portage/repository/config.py b/pym/portage/repository/config.py
index 1d897bb903..c7440369c2 100644
--- a/pym/portage/repository/config.py
+++ b/pym/portage/repository/config.py
@@ -86,6 +86,7 @@ class RepoConfig(object):
                'sync_type', 'sync_umask', 'sync_uri', 'sync_user', 
'thin_manifest',
                'update_changelog', '_eapis_banned', '_eapis_deprecated',
                '_masters_orig', 'module_specific_options', 
'manifest_required_hashes',
+               'sync_allow_hardlinks',
                'sync_openpgp_key_path',
                'sync_openpgp_key_refresh_retry_count',
                'sync_openpgp_key_refresh_retry_delay_max',
@@ -188,6 +189,9 @@ class RepoConfig(object):
                self.strict_misc_digests = repo_opts.get(
                        'strict-misc-digests', 'true').lower() == 'true'
 
+               self.sync_allow_hardlinks = repo_opts.get(
+                       'sync-allow-hardlinks', 'true').lower() in ('true', 
'yes')
+
                self.sync_openpgp_key_path = repo_opts.get(
                        'sync-openpgp-key-path', None)
 
@@ -534,6 +538,7 @@ class RepoConfigLoader(object):
                                                        'clone_depth', 
'eclass_overrides',
                                                        'force', 'masters', 
'priority', 'strict_misc_digests',
                                                        'sync_depth', 
'sync_hooks_only_on_change',
+                                                       'sync_allow_hardlinks',
                                                        'sync_openpgp_key_path',
                                                        
'sync_openpgp_key_refresh_retry_count',
                                                        
'sync_openpgp_key_refresh_retry_delay_max',
@@ -962,7 +967,8 @@ class RepoConfigLoader(object):
        def config_string(self):
                bool_keys = ("strict_misc_digests",)
                str_or_int_keys = ("auto_sync", "clone_depth", "format", 
"location",
-                       "main_repo", "priority", "sync_depth", 
"sync_openpgp_key_path",
+                       "main_repo", "priority", "sync_depth",
+                       "sync_allow_hardlinks", "sync_openpgp_key_path",
                        "sync_openpgp_key_refresh_retry_count",
                        "sync_openpgp_key_refresh_retry_delay_max",
                        "sync_openpgp_key_refresh_retry_delay_exp_base",
diff --git a/pym/portage/sync/modules/rsync/rsync.py 
b/pym/portage/sync/modules/rsync/rsync.py
index 382a1eaaef..59211d2bb8 100644
--- a/pym/portage/sync/modules/rsync/rsync.py
+++ b/pym/portage/sync/modules/rsync/rsync.py
@@ -11,6 +11,7 @@ import functools
 import io
 import re
 import random
+import subprocess
 import tempfile
 
 import portage
@@ -61,6 +62,56 @@ class RsyncSync(NewBase):
        def __init__(self):
                NewBase.__init__(self, "rsync", RSYNC_PACKAGE_ATOM)
 
+       def _select_download_dir(self):
+               '''
+               Select and return the download directory. It's desirable to be 
able
+               to create shared hardlinks between the download directory to the
+               normal repository, and this is facilitated by making the 
download
+               directory be a subdirectory of the normal repository location
+               (ensuring that no mountpoints are crossed). Shared hardlinks are
+               created by using the rsync --link-dest option.
+
+               Since the download is initially unverified, it is safest to save
+               it in a quarantine directory. The quarantine directory is also
+               useful for making the repository update more atomic, so that it
+               less likely that normal repository location will be observed in
+               a partially synced state.
+
+               This method returns a quarantine directory if 
sync-allow-hardlinks
+               is enabled in repos.conf, and otherwise it returne the normal
+               repository location.
+               '''
+               if self.repo.sync_allow_hardlinks:
+                       return os.path.join(self.repo.location, 
'.tmp-unverified-download-quarantine')
+               else:
+                       return self.repo.location
+
+       def _commit_download(self, download_dir):
+               '''
+               Commit changes from download_dir if it does not refer to the
+               normal repository location.
+               '''
+               exitcode = 0
+               if self.repo.location != download_dir:
+                       rsynccommand = [self.bin_command] + self.rsync_opts + 
self.extra_rsync_opts
+                       rsynccommand.append('--exclude=/%s' % 
os.path.basename(download_dir))
+                       rsynccommand.append('%s/' % download_dir.rstrip('/'))
+                       rsynccommand.append('%s/' % self.repo.location)
+                       exitcode = subprocess.call(rsynccommand)
+                       if exitcode == 0:
+                               exitcode = self._remove_download(download_dir)
+
+               return exitcode
+
+       def _remove_download(self, download_dir):
+               """
+               Remove download_dir if it does not refer to the normal 
repository
+               location.
+               """
+               exitcode = 0
+               if self.repo.location != download_dir:
+                       exitcode = subprocess.call(['rm', '-rf', download_dir])
+               return exitcode
 
        def update(self):
                '''Internal update function which performs the transfer'''
@@ -97,6 +148,9 @@ class RsyncSync(NewBase):
                        self.extra_rsync_opts.extend(portage.util.shlex_split(
                                
self.repo.module_specific_options['sync-rsync-extra-opts']))
 
+               download_dir = self._select_download_dir()
+               exitcode = 0
+
                # Process GLEP74 verification options.
                # Default verification to 'no'; it's enabled for ::gentoo
                # via default repos.conf though.
@@ -219,8 +273,10 @@ class RsyncSync(NewBase):
                                self.proto = "file"
                                dosyncuri = syncuri[7:]
                                unchanged, is_synced, exitcode, updatecache_flg 
= self._do_rsync(
-                                       dosyncuri, timestamp, opts)
+                                       dosyncuri, timestamp, opts, 
download_dir)
                                self._process_exitcode(exitcode, dosyncuri, 
out, 1)
+                               if exitcode == 0 and not unchanged:
+                                       self._commit_download(download_dir)
                                return (exitcode, updatecache_flg)
 
                        retries=0
@@ -352,7 +408,7 @@ class RsyncSync(NewBase):
                                        dosyncuri = dosyncuri[6:].replace('/', 
':/', 1)
 
                                unchanged, is_synced, exitcode, updatecache_flg 
= self._do_rsync(
-                                       dosyncuri, timestamp, opts)
+                                       dosyncuri, timestamp, opts, 
download_dir)
                                if not unchanged:
                                        local_state_unchanged = False
                                if is_synced:
@@ -369,6 +425,12 @@ class RsyncSync(NewBase):
                                        break
                        self._process_exitcode(exitcode, dosyncuri, out, 
maxretries)
 
+                       if local_state_unchanged:
+                               # The quarantine download_dir is not intended 
to exist
+                               # in this case, so refer gemato to the normal 
repository
+                               # location.
+                               download_dir = self.repo.location
+
                        # if synced successfully, verify now
                        if exitcode == 0 and self.verify_metamanifest:
                                if gemato is None:
@@ -380,7 +442,7 @@ class RsyncSync(NewBase):
                                                # we always verify the Manifest 
signature, in case
                                                # we had to deal with key 
revocation case
                                                m = 
gemato.recursiveloader.ManifestRecursiveLoader(
-                                                               
os.path.join(self.repo.location, 'Manifest'),
+                                                               
os.path.join(download_dir, 'Manifest'),
                                                                
verify_openpgp=True,
                                                                
openpgp_env=openpgp_env,
                                                                
max_jobs=self.verify_jobs)
@@ -411,7 +473,7 @@ class RsyncSync(NewBase):
                                                # if nothing has changed, skip 
the actual Manifest
                                                # verification
                                                if not local_state_unchanged:
-                                                       out.ebegin('Verifying 
%s' % (self.repo.location,))
+                                                       out.ebegin('Verifying 
%s' % (download_dir,))
                                                        
m.assert_directory_verifies()
                                                        out.eend(0)
                                        except GematoException as e:
@@ -420,12 +482,16 @@ class RsyncSync(NewBase):
                                                                
level=logging.ERROR, noiselevel=-1)
                                                exitcode = 1
 
+                       if exitcode == 0 and not local_state_unchanged:
+                               exitcode = self._commit_download(download_dir)
+
                        return (exitcode, updatecache_flg)
                finally:
+                       if exitcode == 0:
+                               self._remove_download(download_dir)
                        if openpgp_env is not None:
                                openpgp_env.close()
 
-
        def _process_exitcode(self, exitcode, syncuri, out, maxretries):
                if (exitcode==0):
                        pass
@@ -561,7 +627,7 @@ class RsyncSync(NewBase):
                return rsync_opts
 
 
-       def _do_rsync(self, syncuri, timestamp, opts):
+       def _do_rsync(self, syncuri, timestamp, opts, download_dir):
                updatecache_flg = False
                is_synced = False
                if timestamp != 0 and "--quiet" not in opts:
@@ -686,6 +752,12 @@ class RsyncSync(NewBase):
                        elif (servertimestamp == 0) or (servertimestamp > 
timestamp):
                                # actual sync
                                command = rsynccommand[:]
+
+                               if self.repo.location != download_dir:
+                                       # Use shared hardlinks for files that 
are identical
+                                       # in the previous snapshot of the 
repository.
+                                       command.append('--link-dest=%s' % 
self.repo.location)
+
                                submodule_paths = self._get_submodule_paths()
                                if submodule_paths:
                                        # The only way to select multiple 
directories to
@@ -696,9 +768,10 @@ class RsyncSync(NewBase):
                                                # /./ is special syntax 
supported with the
                                                # rsync --relative option.
                                                command.append(syncuri + "/./" 
+ path)
-                                       command.append(self.repo.location)
                                else:
-                                       command.extend([syncuri + "/", 
self.repo.location])
+                                       command.append(syncuri + "/")
+
+                               command.append(download_dir)
 
                                exitcode = None
                                try:
-- 
2.13.6


Reply via email to