commit:     71d9ce40be5bbf533a6d1b59c5a460621c3c91c4
Author:     Zac Medico <zmedico <AT> gentoo <DOT> org>
AuthorDate: Thu Mar 14 04:09:21 2024 +0000
Commit:     Zac Medico <zmedico <AT> gentoo <DOT> org>
CommitDate: Sat May 25 22:08:15 2024 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=71d9ce40

Add get_repo_revision_history function and repo_revisions file

The history of synced revisions is provided by a new
get_repo_revision_history function and corresponding
/var/lib/portage/repo_revisions file, with history
limit currently capped at 25 revisions. If a change
is detected and the current process has permission
to update the repo_revisions file, then the file will
be updated with any newly detected revisions.
For volatile repos the revisions may be unordered,
which makes them unusable for the purposes of the
revision history, so the revisions of volatile repos
are not tracked. This functions detects revisions
which are not yet visible to the current process due
to the sync-rcu option.

The emaint revisions --purgerepos and --purgeallrepos
options allow revisions for some or all repos to be
easily purged from the history. For example, the
emerge-webrsync script uses this emaint commmand to
purge the revision history of the gentoo repo when
the emerge-webrsync --revert option is used to roll
back to a previous snapshot:

    emaint revisions --purgerepos="${repo_name}"

Bug: https://bugs.gentoo.org/924772
Signed-off-by: Zac Medico <zmedico <AT> gentoo.org>

 bin/emerge-webrsync                               |   3 +-
 lib/portage/const.py                              |   1 +
 lib/portage/emaint/modules/meson.build            |   1 +
 lib/portage/emaint/modules/revisions/__init__.py  |  36 ++++++
 lib/portage/emaint/modules/revisions/meson.build  |   8 ++
 lib/portage/emaint/modules/revisions/revisions.py |  95 ++++++++++++++++
 lib/portage/sync/controller.py                    |   8 +-
 lib/portage/sync/meson.build                      |   1 +
 lib/portage/sync/revision_history.py              | 133 ++++++++++++++++++++++
 lib/portage/tests/sync/test_sync_local.py         |  75 +++++++++++-
 man/emaint.1                                      |  18 ++-
 man/portage.5                                     |  15 +++
 12 files changed, 387 insertions(+), 7 deletions(-)

diff --git a/bin/emerge-webrsync b/bin/emerge-webrsync
index 99da05543a..caa4986da2 100755
--- a/bin/emerge-webrsync
+++ b/bin/emerge-webrsync
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Copyright 1999-2023 Gentoo Authors
+# Copyright 1999-2024 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 # Author: Karl Trygve Kalleberg <kar...@gentoo.org>
 # Rewritten from the old, Perl-based emerge-webrsync script
@@ -732,6 +732,7 @@ main() {
        [[ ${do_debug} -eq 1 ]] && set -x
 
        if [[ -n ${revert_date} ]] ; then
+               emaint revisions --purgerepos="${repo_name}"
                do_snapshot 1 "${revert_date}"
        else
                do_latest_snapshot

diff --git a/lib/portage/const.py b/lib/portage/const.py
index 2154213b7b..c9a71009a7 100644
--- a/lib/portage/const.py
+++ b/lib/portage/const.py
@@ -51,6 +51,7 @@ PRIVATE_PATH = "var/lib/portage"
 WORLD_FILE = f"{PRIVATE_PATH}/world"
 WORLD_SETS_FILE = f"{PRIVATE_PATH}/world_sets"
 CONFIG_MEMORY_FILE = f"{PRIVATE_PATH}/config"
+REPO_REVISIONS = f"{PRIVATE_PATH}/repo_revisions"
 NEWS_LIB_PATH = "var/lib/gentoo"
 
 # these variables get EPREFIX prepended automagically when they are

diff --git a/lib/portage/emaint/modules/meson.build 
b/lib/portage/emaint/modules/meson.build
index 48f4f77d83..33b396be94 100644
--- a/lib/portage/emaint/modules/meson.build
+++ b/lib/portage/emaint/modules/meson.build
@@ -12,5 +12,6 @@ subdir('logs')
 subdir('merges')
 subdir('move')
 subdir('resume')
+subdir('revisions')
 subdir('sync')
 subdir('world')

diff --git a/lib/portage/emaint/modules/revisions/__init__.py 
b/lib/portage/emaint/modules/revisions/__init__.py
new file mode 100644
index 0000000000..c51cbb4bf3
--- /dev/null
+++ b/lib/portage/emaint/modules/revisions/__init__.py
@@ -0,0 +1,36 @@
+# Copyright 2024 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+doc = """Purge repo_revisions history file."""
+__doc__ = doc
+
+
+module_spec = {
+    "name": "revisions",
+    "description": doc,
+    "provides": {
+        "purgerevisions": {
+            "name": "revisions",
+            "sourcefile": "revisions",
+            "class": "PurgeRevisions",
+            "description": "Purge repo_revisions history",
+            "functions": ["purgeallrepos", "purgerepos"],
+            "func_desc": {
+                "repo": {
+                    "long": "--purgerepos",
+                    "help": "(revisions module only): --purgerepos  Purge 
revisions for the specified repo(s)",
+                    "status": "Purging %s",
+                    "action": "store",
+                    "func": "purgerepos",
+                },
+                "allrepos": {
+                    "long": "--purgeallrepos",
+                    "help": "(revisions module only): --purgeallrepos  Purge 
revisions for all repos",
+                    "status": "Purging %s",
+                    "action": "store_true",
+                    "func": "purgeallrepos",
+                },
+            },
+        },
+    },
+}

diff --git a/lib/portage/emaint/modules/revisions/meson.build 
b/lib/portage/emaint/modules/revisions/meson.build
new file mode 100644
index 0000000000..9d4c61ec4d
--- /dev/null
+++ b/lib/portage/emaint/modules/revisions/meson.build
@@ -0,0 +1,8 @@
+py.install_sources(
+    [
+        'revisions.py',
+        '__init__.py',
+    ],
+    subdir : 'portage/emaint/modules/revisions',
+    pure : not native_extensions
+)

diff --git a/lib/portage/emaint/modules/revisions/revisions.py 
b/lib/portage/emaint/modules/revisions/revisions.py
new file mode 100644
index 0000000000..7078b2a8b4
--- /dev/null
+++ b/lib/portage/emaint/modules/revisions/revisions.py
@@ -0,0 +1,95 @@
+# Copyright 2024 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+import json
+import os
+
+import portage
+
+
+class PurgeRevisions:
+    short_desc = "Purge repo_revisions history file."
+
+    @staticmethod
+    def name():
+        return "revisions"
+
+    def __init__(self, settings=None):
+        """Class init function
+
+        @param settings: optional portage.config instance to get EROOT from.
+        """
+        self._settings = settings
+
+    @property
+    def settings(self):
+        return self._settings or portage.settings
+
+    def purgeallrepos(self, **kwargs):
+        """Purge revisions for all repos"""
+        repo_revisions_file = os.path.join(
+            self.settings["EROOT"], portage.const.REPO_REVISIONS
+        )
+        msgs = []
+        try:
+            os.stat(repo_revisions_file)
+        except FileNotFoundError:
+            pass
+        except OSError as e:
+            msgs.append(f"{repo_revisions_file}: {e}")
+        else:
+            repo_revisions_lock = None
+            try:
+                repo_revisions_lock = 
portage.locks.lockfile(repo_revisions_file)
+                os.unlink(repo_revisions_file)
+            except FileNotFoundError:
+                pass
+            except OSError as e:
+                msgs.append(f"{repo_revisions_file}: {e}")
+            finally:
+                if repo_revisions_lock is not None:
+                    portage.locks.unlockfile(repo_revisions_lock)
+        return (not msgs, msgs)
+
+    def purgerepos(self, **kwargs):
+        """Purge revisions for specified repos"""
+        options = kwargs.get("options", None)
+        if options:
+            repo_names = options.get("purgerepos", "")
+        if isinstance(repo_names, str):
+            repo_names = repo_names.split()
+
+        repo_revisions_file = os.path.join(
+            self.settings["EROOT"], portage.const.REPO_REVISIONS
+        )
+        msgs = []
+        try:
+            os.stat(repo_revisions_file)
+        except FileNotFoundError:
+            pass
+        except OSError as e:
+            msgs.append(f"{repo_revisions_file}: {e}")
+        else:
+            repo_revisions_lock = None
+            try:
+                repo_revisions_lock = 
portage.locks.lockfile(repo_revisions_file)
+                with open(repo_revisions_file, encoding="utf8") as f:
+                    if os.fstat(f.fileno()).st_size:
+                        previous_revisions = json.load(f)
+                repo_revisions = (
+                    {} if previous_revisions is None else 
previous_revisions.copy()
+                )
+                for repo_name in repo_names:
+                    repo_revisions.pop(repo_name, None)
+                if not repo_revisions:
+                    os.unlink(repo_revisions_file)
+                elif repo_revisions != previous_revisions:
+                    f = portage.util.atomic_ofstream(repo_revisions_file)
+                    json.dump(repo_revisions, f, ensure_ascii=False, 
sort_keys=True)
+                    f.close()
+            except OSError as e:
+                msgs.append(f"{repo_revisions_file}: {e}")
+            finally:
+                if repo_revisions_lock is not None:
+                    portage.locks.unlockfile(repo_revisions_lock)
+        return (not msgs, msgs)

diff --git a/lib/portage/sync/controller.py b/lib/portage/sync/controller.py
index da593e1a85..1d55c8a5dd 100644
--- a/lib/portage/sync/controller.py
+++ b/lib/portage/sync/controller.py
@@ -1,4 +1,4 @@
-# Copyright 2014-2020 Gentoo Authors
+# Copyright 2014-2024 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 
 import sys
@@ -8,6 +8,11 @@ import pwd
 import warnings
 
 import portage
+
+portage.proxy.lazyimport.lazyimport(
+    globals(),
+    "portage.sync.revision_history:get_repo_revision_history",
+)
 from portage import os
 from portage.progress import ProgressBar
 
@@ -170,6 +175,7 @@ class SyncManager:
         status = None
         taskmaster = TaskHandler(callback=self.do_callback)
         taskmaster.run_tasks(tasks, func, status, options=task_opts)
+        get_repo_revision_history(self.settings["EROOT"], [repo])
 
         if master_hooks or self.updatecache_flg or not 
repo.sync_hooks_only_on_change:
             hooks_enabled = True

diff --git a/lib/portage/sync/meson.build b/lib/portage/sync/meson.build
index a39f1e3cf6..59af12561c 100644
--- a/lib/portage/sync/meson.build
+++ b/lib/portage/sync/meson.build
@@ -4,6 +4,7 @@ py.install_sources(
         'controller.py',
         'getaddrinfo_validate.py',
         'old_tree_timestamp.py',
+        'revision_history.py',
         'syncbase.py',
         '__init__.py',
     ],

diff --git a/lib/portage/sync/revision_history.py 
b/lib/portage/sync/revision_history.py
new file mode 100644
index 0000000000..3d909d94ee
--- /dev/null
+++ b/lib/portage/sync/revision_history.py
@@ -0,0 +1,133 @@
+# Copyright 2024 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+import json
+import os
+from typing import Optional
+
+import portage
+from portage.locks import lockfile, unlockfile
+from portage.repository.config import RepoConfig
+from portage.util.path import first_existing
+
+_HISTORY_LIMIT = 25
+
+
+def get_repo_revision_history(
+    eroot: str, repos: Optional[list[RepoConfig]] = None
+) -> dict[str, list[str]]:
+    """
+    Get revision history of synced repos. Returns a dict that maps
+    a repo name to list of revisions in descending order by time.
+    If a change is detected and the current process has permission
+    to update the repo_revisions file, then the file will be updated
+    with any newly detected revisions.
+
+    This functions detects revisions which are not yet visible to the
+    current process due to the sync-rcu option.
+
+    @param eroot: EROOT to query
+    @type eroot: string
+    @param repos: list of RepoConfig instances to check for new revisions
+    @type repos: list
+    @rtype: dict
+    @return: mapping of repo name to list of revisions in descending
+             order by time
+    """
+    items = []
+    for repo in repos or ():
+        if repo.volatile:
+            items.append((repo, None))
+            continue
+        if repo.sync_type:
+            try:
+                sync_mod = 
portage.sync.module_controller.get_class(repo.sync_type)
+            except portage.exception.PortageException:
+                continue
+        else:
+            continue
+        repo_location_orig = repo.location
+        try:
+            if repo.user_location is not None:
+                # Temporarily override sync-rcu behavior which pins the
+                # location to a previous snapshot, since we want the
+                # latest available revision here.
+                repo.location = repo.user_location
+            status, repo_revision = sync_mod().retrieve_head(options={"repo": 
repo})
+        except NotImplementedError:
+            repo_revision = None
+        else:
+            repo_revision = repo_revision.strip() if status == os.EX_OK else 
None
+        finally:
+            repo.location = repo_location_orig
+
+        if repo_revision is not None:
+            items.append((repo, repo_revision))
+
+    return _maybe_update_revisions(eroot, items)
+
+
+def _update_revisions(repo_revisions, items):
+    modified = False
+    for repo, repo_revision in items:
+        if repo.volatile:
+            # For volatile repos the revisions may be unordered,
+            # which makes them unusable here where revisions are
+            # intended to be ordered, so discard them.
+            rev_list = repo_revisions.pop(repo.name, None)
+            if rev_list:
+                modified = True
+            continue
+
+        rev_list = repo_revisions.setdefault(repo.name, [])
+        if not rev_list or rev_list[0] != repo_revision:
+            rev_list.insert(0, repo_revision)
+            del rev_list[_HISTORY_LIMIT:]
+            modified = True
+    return modified
+
+
+def _maybe_update_revisions(eroot, items):
+    repo_revisions_file = os.path.join(eroot, portage.const.REPO_REVISIONS)
+    repo_revisions_lock = None
+    try:
+        previous_revisions = None
+        try:
+            with open(repo_revisions_file, encoding="utf8") as f:
+                if os.fstat(f.fileno()).st_size:
+                    previous_revisions = json.load(f)
+        except FileNotFoundError:
+            pass
+
+        repo_revisions = {} if previous_revisions is None else 
previous_revisions.copy()
+        modified = _update_revisions(repo_revisions, items)
+
+        # If modified then do over with lock if permissions allow.
+        if modified and os.access(
+            first_existing(os.path.dirname(repo_revisions_file)), os.W_OK
+        ):
+            # This is a bit redundant since the config._init_dirs method
+            # is supposed to create PRIVATE_PATH with these permissions.
+            portage.util.ensure_dirs(
+                os.path.dirname(repo_revisions_file),
+                gid=portage.data.portage_gid,
+                mode=0o2750,
+                mask=0o2,
+            )
+            repo_revisions_lock = lockfile(repo_revisions_file)
+            previous_revisions = None
+            with open(repo_revisions_file, encoding="utf8") as f:
+                if os.fstat(f.fileno()).st_size:
+                    previous_revisions = json.load(f)
+            repo_revisions = (
+                {} if previous_revisions is None else previous_revisions.copy()
+            )
+            _update_revisions(repo_revisions, items)
+            f = portage.util.atomic_ofstream(repo_revisions_file)
+            json.dump(repo_revisions, f, ensure_ascii=False, sort_keys=True)
+            f.close()
+    finally:
+        if repo_revisions_lock is not None:
+            unlockfile(repo_revisions_lock)
+
+    return repo_revisions

diff --git a/lib/portage/tests/sync/test_sync_local.py 
b/lib/portage/tests/sync/test_sync_local.py
index aeeb5d0b13..91649398de 100644
--- a/lib/portage/tests/sync/test_sync_local.py
+++ b/lib/portage/tests/sync/test_sync_local.py
@@ -1,7 +1,8 @@
-# Copyright 2014-2023 Gentoo Authors
+# Copyright 2014-2024 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 
 import datetime
+import json
 import subprocess
 import sys
 import textwrap
@@ -9,8 +10,9 @@ import textwrap
 import portage
 from portage import os, shutil, _shell_quote
 from portage import _unicode_decode
-from portage.const import PORTAGE_PYM_PATH, TIMESTAMP_FORMAT
+from portage.const import PORTAGE_PYM_PATH, REPO_REVISIONS, TIMESTAMP_FORMAT
 from portage.process import find_binary
+from portage.sync.revision_history import get_repo_revision_history
 from portage.tests import TestCase
 from portage.tests.resolver.ResolverPlayground import ResolverPlayground
 from portage.util import ensure_dirs
@@ -43,6 +45,7 @@ class SyncLocalTestCase(TestCase):
                        sync-rcu = %(sync-rcu)s
                        sync-rcu-store-dir = 
%(EPREFIX)s/var/repositories/test_repo_rcu_storedir
                        auto-sync = %(auto-sync)s
+                       volatile = no
                        %(repo_extra_keys)s
                """
         )
@@ -50,7 +53,7 @@ class SyncLocalTestCase(TestCase):
         profile = {"eapi": ("5",), "package.use.stable.mask": ("dev-libs/A 
flag",)}
 
         ebuilds = {
-            "dev-libs/A-0": {},
+            "dev-libs/A-0": {"EAPI": "8"},
             "sys-apps/portage-3.0": {"IUSE": "+python_targets_python3_8"},
         }
 
@@ -81,7 +84,7 @@ class SyncLocalTestCase(TestCase):
         rcu_store_dir = os.path.join(eprefix, 
"var/repositories/test_repo_rcu_storedir")
 
         cmds = {}
-        for cmd in ("emerge", "emaint"):
+        for cmd in ("egencache", "emerge", "emaint"):
             for bindir in (self.bindir, self.sbindir):
                 path = os.path.join(str(bindir), cmd)
                 if os.path.exists(path):
@@ -298,6 +301,21 @@ class SyncLocalTestCase(TestCase):
                 ),
             ),
             (repo.location, git_cmd + ("init-db",)),
+            # Ensure manifests and cache are valid after
+            # previous calls to alter_ebuild.
+            (
+                homedir,
+                cmds["egencache"]
+                + (
+                    f"--repo={repo.name}",
+                    "--update",
+                    "--update-manifests",
+                    "--sign-manifests=n",
+                    "--strict-manifests=n",
+                    
f"--repositories-configuration={settings['PORTAGE_REPOSITORIES']}",
+                    f"--jobs={portage.util.cpuinfo.get_cpu_count()}",
+                ),
+            ),
             (repo.location, git_cmd + ("add", ".")),
             (repo.location, git_cmd + ("commit", "-a", "-m", "add whole 
repo")),
         )
@@ -314,6 +332,54 @@ class SyncLocalTestCase(TestCase):
             (homedir, lambda: shutil.rmtree(os.path.join(repo.location, 
".git"))),
         )
 
+        def get_revision_history(sync_type="git"):
+            # Override volatile to False here because it gets set
+            # True by RepoConfig when repo.location is not root
+            # or portage owned.
+            try:
+                volatile_orig = repo.volatile
+                repo.volatile = False
+                sync_type_orig = repo.sync_type
+                repo.sync_type = sync_type
+                revision_history = get_repo_revision_history(eroot, 
repos=[repo])
+            finally:
+                repo.sync_type = sync_type_orig
+                repo.volatile = volatile_orig
+
+            return revision_history
+
+        repo_revisions_cmds = (
+            (homedir, lambda: self.assertTrue(bool(get_revision_history()))),
+            (
+                homedir,
+                lambda: self.assertTrue(
+                    os.path.exists(os.path.join(eroot, REPO_REVISIONS))
+                ),
+            ),
+            (homedir, cmds["emaint"] + ("revisions", 
f"--purgerepos={repo.name}")),
+            (
+                homedir,
+                lambda: self.assertFalse(
+                    os.path.exists(os.path.join(eroot, REPO_REVISIONS))
+                ),
+            ),
+            (homedir, lambda: self.assertTrue(bool(get_revision_history()))),
+            (
+                homedir,
+                lambda: self.assertTrue(
+                    os.path.exists(os.path.join(eroot, REPO_REVISIONS))
+                ),
+            ),
+            (homedir, cmds["emaint"] + ("revisions", "--purgeallrepos")),
+            (
+                homedir,
+                lambda: self.assertFalse(
+                    os.path.exists(os.path.join(eroot, REPO_REVISIONS))
+                ),
+            ),
+            (homedir, lambda: self.assertTrue(bool(get_revision_history()))),
+        )
+
         def hg_init_global_config():
             with open(os.path.join(homedir, ".hgrc"), "w") as f:
                 f.write(f"[ui]\nusername = {committer_name} 
<{committer_email}>\n")
@@ -451,6 +517,7 @@ class SyncLocalTestCase(TestCase):
                 + sync_type_git_shallow
                 + upstream_git_commit
                 + sync_cmds
+                + repo_revisions_cmds
                 + mercurial_tests
             ):
                 if hasattr(cmd, "__call__"):

diff --git a/man/emaint.1 b/man/emaint.1
index 2abba9d47b..86d5e89736 100644
--- a/man/emaint.1
+++ b/man/emaint.1
@@ -1,4 +1,4 @@
-.TH "EMAINT" "1" "Feb 2021" "Portage @VERSION@" "Portage"
+.TH "EMAINT" "1" "Mar 2024" "Portage @VERSION@" "Portage"
 .SH NAME
 emaint \- performs package management related system health checks and 
maintenance
 .SH SYNOPSIS
@@ -54,6 +54,11 @@ Perform package move updates for installed packages.
 .br
 OPTIONS: check, fix
 .TP
+.BR revisions
+Purge repo_revisions history file.
+.br
+OPTIONS: purgerepos, purgeallrepos
+.TP
 .BR sync
 Perform sync actions on specified repositories.
 .br
@@ -86,6 +91,13 @@ deleted.
 .TP
 .BR \-y ", " \-\-yes
 Do not prompt for emerge invocations.
+.SH OPTIONS revisions command only
+.TP
+.BR \-\-purgeallrepos
+Purge revisions for all repos.
+.TP
+.BR \-\-purgerepos \ \fIREPO\fR
+Purge revisions for the specified repo(s).
 .SH OPTIONS sync command only
 .TP
 .BR \-a ", " \-\-auto
@@ -121,6 +133,10 @@ Contains the paths and md5sums of all the config files 
being tracked.
 .B /var/lib/portage/failed-merges
 Contains the packages and timestamps of any failed merges being cleaned from
 the system, and to be re-emerged.
+.TP
+.B /var/lib/portage/repo_revisions
+Contains the most recent repository revisions obtained via either
+\fBemaint sync\fR or \fBemerge \-\-sync\fR.
 .SH "SEE ALSO"
 .BR emerge (1),
 .BR portage (5)

diff --git a/man/portage.5 b/man/portage.5
index 3b8329bfb2..66437d8f8a 100644
--- a/man/portage.5
+++ b/man/portage.5
@@ -134,6 +134,7 @@ database to track installed packages
 .BR /var/lib/portage/
 .nf
 config
+repo_revisions
 world
 world_sets
 .fi
@@ -1901,6 +1902,20 @@ Hashes which are used to determine whether files in 
config protected
 directories have been modified since being installed.  Files which have not
 been modified will automatically be unmerged.
 .TP
+.BR repo_revisions
+Contains the most recent repository revisions obtained via either
+\fBemaint sync\fR or \fBemerge \-\-sync\fR. The format is a JSON
+object which maps a repo name to list of revisions in descending
+order by time. In cases when revisions are not ordered by time,
+the volatile attribute should be set in \fBrepos.conf\fR in order
+to prevent unordered revisions from being stored in the
+repo_revisions file. The \fBemaint revisions\fR command can be
+used to purge revisions for specific repos, which should be done
+in any case when there is a need to roll back to an older
+revision (the \fBemerge\-webrsync \-\-revert\fR option calls
+\fBemaint revisions\fR in order to purge all revision history
+for the repository).
+.TP
 .BR world
 Every time you emerge a package, the package that you requested is
 recorded here.  Then when you run `emerge world \-up`, the list of

Reply via email to