Add --jobs and --load-average options which allow dependency checks
for multiple profiles to run in parallel. The increase in performance
is non-linear for the number of jobs, but it can be worthwhile
(I measured a 35% decrease in time when running 'repoman -j8 full'
on sys-apps/portage). For the -j1 case (default), all dependency
checks run in the main process as usual, so there is no significant
performance penalty for the default case.

Bug: https://bugs.gentoo.org/448462
Signed-off-by: Zac Medico <zmed...@gentoo.org>
---
 repoman/lib/repoman/argparser.py              |   9 ++
 .../repoman/modules/scan/depend/profile.py    | 117 +++++++++++++++---
 repoman/man/repoman.1                         |   9 +-
 3 files changed, 116 insertions(+), 19 deletions(-)

diff --git a/repoman/lib/repoman/argparser.py b/repoman/lib/repoman/argparser.py
index 670a0e91d..6d545ccca 100644
--- a/repoman/lib/repoman/argparser.py
+++ b/repoman/lib/repoman/argparser.py
@@ -199,6 +199,15 @@ def parse_args(argv, repoman_default_opts):
                '--output-style', dest='output_style', choices=output_keys,
                help='select output type', default='default')
 
+       parser.add_argument(
+               '-j', '--jobs', dest='jobs', action='store', type=int, 
default=1,
+               help='Specifies the number of jobs (processes) to run 
simultaneously.')
+
+       parser.add_argument(
+               '-l', '--load-average', dest='load_average', action='store', 
type=float, default=None,
+               help='Specifies that no new jobs (processes) should be started 
if there are others '
+                       'jobs running and the load average is at least load (a 
floating-point number).')
+
        parser.add_argument(
                '--mode', dest='mode', choices=mode_keys,
                help='specify which mode repoman will run in (default=full)')
diff --git a/repoman/lib/repoman/modules/scan/depend/profile.py 
b/repoman/lib/repoman/modules/scan/depend/profile.py
index 39d8b550c..1eb69422a 100644
--- a/repoman/lib/repoman/modules/scan/depend/profile.py
+++ b/repoman/lib/repoman/modules/scan/depend/profile.py
@@ -2,7 +2,9 @@
 
 
 import copy
+import functools
 import os
+import types
 from pprint import pformat
 
 from _emerge.Package import Package
@@ -15,6 +17,10 @@ from repoman.modules.scan.depend._gen_arches import 
_gen_arches
 from portage.dep import Atom
 from portage.package.ebuild.profile_iuse import iter_iuse_vars
 from portage.util import getconfig
+from portage.util.futures import asyncio
+from portage.util.futures.compat_coroutine import coroutine, coroutine_return
+from portage.util.futures.executor.fork import ForkExecutor
+from portage.util.futures.iter_completed import async_iter_completed
 
 
 def sort_key(item):
@@ -58,16 +64,14 @@ class ProfileDependsChecks(ScanBase):
        def check(self, **kwargs):
                '''Perform profile dependant dependency checks
 
-               @param arches:
                @param pkg: Package in which we check (object).
                @param ebuild: Ebuild which we check (object).
-               @param baddepsyntax: boolean
-               @param unknown_pkgs: set of tuples (type, atom.unevaluated_atom)
                @returns: dictionary
                '''
                ebuild = kwargs.get('ebuild').get()
                pkg = kwargs.get('pkg').get()
-               unknown_pkgs, baddepsyntax = _depend_checks(
+
+               ebuild.unknown_pkgs, ebuild.baddepsyntax = _depend_checks(
                        ebuild, pkg, self.portdb, self.qatracker, 
self.repo_metadata,
                        self.repo_settings.qadata)
 
@@ -90,8 +94,64 @@ class ProfileDependsChecks(ScanBase):
                                relevant_profiles.append((keyword, groups, 
prof))
 
                relevant_profiles.sort(key=sort_key)
+               ebuild.relevant_profiles = relevant_profiles
+
+               if self.options.jobs <= 1:
+                       for task in self._iter_tasks(None, None, ebuild, pkg):
+                               task, results = task
+                               for result in results:
+                                       self._check_result(task, result)
+
+               loop = asyncio._wrap_loop()
+               loop.run_until_complete(self._async_check(loop=loop, **kwargs))
+
+               return False
+
+       @coroutine
+       def _async_check(self, loop=None, **kwargs):
+               '''Perform async profile dependant dependency checks
+
+               @param arches:
+               @param pkg: Package in which we check (object).
+               @param ebuild: Ebuild which we check (object).
+               @param baddepsyntax: boolean
+               @param unknown_pkgs: set of tuples (type, atom.unevaluated_atom)
+               @returns: dictionary
+               '''
+               loop = asyncio._wrap_loop(loop)
+               ebuild = kwargs.get('ebuild').get()
+               pkg = kwargs.get('pkg').get()
+               unknown_pkgs = ebuild.unknown_pkgs
+               baddepsyntax = ebuild.baddepsyntax
+
+               # Use max_workers=True to ensure immediate fork, since 
_iter_tasks
+               # needs the fork to create a snapshot of current state.
+               executor = ForkExecutor(max_workers=self.options.jobs)
+
+               if self.options.jobs > 1:
+                       for future_done_set in 
async_iter_completed(self._iter_tasks(loop, executor, ebuild, pkg),
+                               max_jobs=self.options.jobs, 
max_load=self.options.load_average, loop=loop):
+                               for task in (yield future_done_set):
+                                       task, results = task.result()
+                                       for result in results:
+                                               self._check_result(task, result)
+
+               if not baddepsyntax and unknown_pkgs:
+                       type_map = {}
+                       for mytype, atom in unknown_pkgs:
+                               type_map.setdefault(mytype, set()).add(atom)
+                       for mytype, atoms in type_map.items():
+                               self.qatracker.add_error(
+                                       "dependency.unknown", "%s: %s: %s"
+                                       % (ebuild.relative_path, mytype, ", 
".join(sorted(atoms))))
 
-               for keyword, groups, prof in relevant_profiles:
+       @coroutine
+       def _task(self, task):
+               yield task.future
+               coroutine_return((task, task.future.result()))
+
+       def _iter_tasks(self, loop, executor, ebuild, pkg):
+               for keyword, groups, prof in ebuild.relevant_profiles:
 
                        is_stable_profile = prof.status == "stable"
                        is_dev_profile = prof.status == "dev" and \
@@ -154,6 +214,22 @@ class ProfileDependsChecks(ScanBase):
                        dep_settings.usemask = 
dep_settings._use_manager.getUseMask(
                                pkg, stable=dep_settings._parent_stable)
 
+                       task = types.SimpleNamespace(ebuild=ebuild, prof=prof, 
keyword=keyword)
+
+                       target = functools.partial(self._task_subprocess, task, 
pkg, dep_settings)
+
+                       if self.options.jobs <= 1:
+                               yield (task, target())
+                       else:
+                               task.future = 
asyncio.ensure_future(loop.run_in_executor(executor, target), loop=loop)
+                               yield self._task(task)
+
+
+       def _task_subprocess(self, task, pkg, dep_settings):
+                       ebuild = task.ebuild
+                       baddepsyntax = ebuild.baddepsyntax
+                       results = []
+                       prof = task.prof
                        if not baddepsyntax:
                                ismasked = not ebuild.archs or \
                                        pkg.cpv not in 
self.portdb.xmatch("match-visible",
@@ -163,7 +239,7 @@ class ProfileDependsChecks(ScanBase):
                                                self.have['pmasked'] = 
bool(dep_settings._getMaskAtom(
                                                        pkg.cpv, 
ebuild.metadata))
                                        if self.options.ignore_masked:
-                                               continue
+                                               return results
                                        # we are testing deps for a masked 
package; give it some lee-way
                                        suffix = "masked"
                                        matchmode = "minimum-all-ignore-profile"
@@ -191,6 +267,22 @@ class ProfileDependsChecks(ScanBase):
                                                myvalue, self.portdb, 
dep_settings,
                                                use="all", mode=matchmode, 
trees=self.repo_settings.trees)
 
+                                       
results.append(types.SimpleNamespace(atoms=atoms, success=success, mykey=mykey, 
mytype=mytype))
+
+                       return results
+
+
+       def _check_result(self, task, result):
+                                       prof = task.prof
+                                       keyword = task.keyword
+                                       ebuild = task.ebuild
+                                       unknown_pkgs = ebuild.unknown_pkgs
+
+                                       success = result.success
+                                       atoms = result.atoms
+                                       mykey = result.mykey
+                                       mytype = result.mytype
+
                                        if success:
                                                if atoms:
 
@@ -223,7 +315,7 @@ class ProfileDependsChecks(ScanBase):
 
                                                        # if we emptied out our 
list, continue:
                                                        if not all_atoms:
-                                                               continue
+                                                               return
 
                                                        # Filter out 
duplicates.  We do this by hand (rather
                                                        # than use a set) so 
the order is stable and better
@@ -255,17 +347,6 @@ class ProfileDependsChecks(ScanBase):
                                                                % 
(ebuild.relative_path, mytype, keyword,
                                                                        prof, 
pformat(atoms, indent=6)))
 
-               if not baddepsyntax and unknown_pkgs:
-                       type_map = {}
-                       for mytype, atom in unknown_pkgs:
-                               type_map.setdefault(mytype, set()).add(atom)
-                       for mytype, atoms in type_map.items():
-                               self.qatracker.add_error(
-                                       "dependency.unknown", "%s: %s: %s"
-                                       % (ebuild.relative_path, mytype, ", 
".join(sorted(atoms))))
-
-               return False
-
        @property
        def runInEbuilds(self):
                '''Ebuild level scans'''
diff --git a/repoman/man/repoman.1 b/repoman/man/repoman.1
index a6a9937e5..6f9a24544 100644
--- a/repoman/man/repoman.1
+++ b/repoman/man/repoman.1
@@ -1,4 +1,4 @@
-.TH "REPOMAN" "1" "Mar 2018" "Repoman VERSION" "Repoman"
+.TH "REPOMAN" "1" "Aug 2020" "Repoman VERSION" "Repoman"
 .SH NAME
 repoman \- Gentoo's program to enforce a minimal level of quality assurance in
 packages added to the ebuild repository
@@ -83,6 +83,13 @@ Be less verbose about extraneous info
 \fB-p\fR, \fB--pretend\fR
 Don't commit or fix anything; just show what would be done
 .TP
+\fB\-j\fR, \fB\-\-jobs\fR
+Specifies the number of jobs (processes) to run simultaneously.
+.TP
+\fB\-l\fR, \fB\-\-load-average\fR
+Specifies that no new jobs (processes) should be started if there are others
+jobs running and the load average is at least load (a floating\-point number).
+.TP
 \fB-x\fR, \fB--xmlparse\fR
 Forces the metadata.xml parse check to be carried out
 .TP
-- 
2.25.3


Reply via email to