On Sun, 16 Aug 2020 20:26:56 -0700 Zac Medico <zmed...@gentoo.org> wrote:
> Add --jobs and --load-average options which allow dependency checks > for multiple profiles to run in parallel. The increase in performance > is non-linear for the number of jobs, but it can be worthwhile > (I measured a 35% decrease in time when running 'repoman -j8 full' > on sys-apps/portage). For the -j1 case (default), all dependency > checks run in the main process as usual, so there is no significant > performance penalty for the default case. > > Bug: https://bugs.gentoo.org/448462 > Signed-off-by: Zac Medico <zmed...@gentoo.org> > --- > repoman/lib/repoman/argparser.py | 9 ++ > .../repoman/modules/scan/depend/profile.py | 117 > +++++++++++++++--- repoman/man/repoman.1 | > 9 +- 3 files changed, 116 insertions(+), 19 deletions(-) > > diff --git a/repoman/lib/repoman/argparser.py > b/repoman/lib/repoman/argparser.py index 670a0e91d..6d545ccca 100644 > --- a/repoman/lib/repoman/argparser.py > +++ b/repoman/lib/repoman/argparser.py > @@ -199,6 +199,15 @@ def parse_args(argv, repoman_default_opts): > '--output-style', dest='output_style', > choices=output_keys, help='select output type', default='default') > > + parser.add_argument( > + '-j', '--jobs', dest='jobs', action='store', > type=int, default=1, > + help='Specifies the number of jobs (processes) to > run simultaneously.') + > + parser.add_argument( > + '-l', '--load-average', dest='load_average', > action='store', type=float, default=None, > + help='Specifies that no new jobs (processes) should > be started if there are others ' > + 'jobs running and the load average is at > least load (a floating-point number).') + > parser.add_argument( > '--mode', dest='mode', choices=mode_keys, > help='specify which mode repoman will run in > (default=full)') diff --git > a/repoman/lib/repoman/modules/scan/depend/profile.py > b/repoman/lib/repoman/modules/scan/depend/profile.py index > 39d8b550c..1eb69422a 100644 --- > a/repoman/lib/repoman/modules/scan/depend/profile.py +++ > b/repoman/lib/repoman/modules/scan/depend/profile.py @@ -2,7 +2,9 @@ > > import copy > +import functools > import os > +import types > from pprint import pformat > > from _emerge.Package import Package > @@ -15,6 +17,10 @@ from repoman.modules.scan.depend._gen_arches > import _gen_arches from portage.dep import Atom > from portage.package.ebuild.profile_iuse import iter_iuse_vars > from portage.util import getconfig > +from portage.util.futures import asyncio > +from portage.util.futures.compat_coroutine import coroutine, > coroutine_return +from portage.util.futures.executor.fork import > ForkExecutor +from portage.util.futures.iter_completed import > async_iter_completed > > def sort_key(item): > @@ -58,16 +64,14 @@ class ProfileDependsChecks(ScanBase): > def check(self, **kwargs): > '''Perform profile dependant dependency checks > > - @param arches: > @param pkg: Package in which we check (object). > @param ebuild: Ebuild which we check (object). > - @param baddepsyntax: boolean > - @param unknown_pkgs: set of tuples (type, > atom.unevaluated_atom) @returns: dictionary > ''' > ebuild = kwargs.get('ebuild').get() > pkg = kwargs.get('pkg').get() > - unknown_pkgs, baddepsyntax = _depend_checks( > + > + ebuild.unknown_pkgs, ebuild.baddepsyntax = > _depend_checks( ebuild, pkg, self.portdb, self.qatracker, > self.repo_metadata, self.repo_settings.qadata) > > @@ -90,8 +94,64 @@ class ProfileDependsChecks(ScanBase): > relevant_profiles.append((keyword, > groups, prof)) > relevant_profiles.sort(key=sort_key) > + ebuild.relevant_profiles = relevant_profiles > + > + if self.options.jobs <= 1: > + for task in self._iter_tasks(None, None, > ebuild, pkg): > + task, results = task > + for result in results: > + self._check_result(task, > result) + > + loop = asyncio._wrap_loop() > + loop.run_until_complete(self._async_check(loop=loop, > **kwargs)) + > + return False > + > + @coroutine > + def _async_check(self, loop=None, **kwargs): > + '''Perform async profile dependant dependency checks > + > + @param arches: > + @param pkg: Package in which we check (object). > + @param ebuild: Ebuild which we check (object). > + @param baddepsyntax: boolean > + @param unknown_pkgs: set of tuples (type, > atom.unevaluated_atom) > + @returns: dictionary > + ''' > + loop = asyncio._wrap_loop(loop) > + ebuild = kwargs.get('ebuild').get() > + pkg = kwargs.get('pkg').get() > + unknown_pkgs = ebuild.unknown_pkgs > + baddepsyntax = ebuild.baddepsyntax > + > + # Use max_workers=True to ensure immediate fork, > since _iter_tasks > + # needs the fork to create a snapshot of current > state. > + executor = > ForkExecutor(max_workers=self.options.jobs) + > + if self.options.jobs > 1: > + for future_done_set in > async_iter_completed(self._iter_tasks(loop, executor, ebuild, pkg), > + max_jobs=self.options.jobs, > max_load=self.options.load_average, loop=loop): > + for task in (yield future_done_set): > + task, results = task.result() > + for result in results: > + > self._check_result(task, result) + > + if not baddepsyntax and unknown_pkgs: > + type_map = {} > + for mytype, atom in unknown_pkgs: > + type_map.setdefault(mytype, > set()).add(atom) > + for mytype, atoms in type_map.items(): > + self.qatracker.add_error( > + "dependency.unknown", "%s: > %s: %s" > + % (ebuild.relative_path, > mytype, ", ".join(sorted(atoms)))) > - for keyword, groups, prof in relevant_profiles: > + @coroutine > + def _task(self, task): > + yield task.future > + coroutine_return((task, task.future.result())) > + > + def _iter_tasks(self, loop, executor, ebuild, pkg): > + for keyword, groups, prof in > ebuild.relevant_profiles: > is_stable_profile = prof.status == "stable" > is_dev_profile = prof.status == "dev" and \ > @@ -154,6 +214,22 @@ class ProfileDependsChecks(ScanBase): > dep_settings.usemask = > dep_settings._use_manager.getUseMask( pkg, > stable=dep_settings._parent_stable) > + task = types.SimpleNamespace(ebuild=ebuild, > prof=prof, keyword=keyword) + > + target = > functools.partial(self._task_subprocess, task, pkg, dep_settings) + > + if self.options.jobs <= 1: > + yield (task, target()) > + else: > + task.future = > asyncio.ensure_future(loop.run_in_executor(executor, target), > loop=loop) > + yield self._task(task) > + > + > + def _task_subprocess(self, task, pkg, dep_settings): > + ebuild = task.ebuild > + baddepsyntax = ebuild.baddepsyntax > + results = [] > + prof = task.prof > if not baddepsyntax: > ismasked = not ebuild.archs or \ > pkg.cpv not in > self.portdb.xmatch("match-visible", @@ -163,7 +239,7 @@ class > ProfileDependsChecks(ScanBase): self.have['pmasked'] = > bool(dep_settings._getMaskAtom( pkg.cpv, ebuild.metadata)) > if > self.options.ignore_masked: > - continue > + return results > # we are testing deps for a > masked package; give it some lee-way suffix = "masked" > matchmode = > "minimum-all-ignore-profile" @@ -191,6 +267,22 @@ class > ProfileDependsChecks(ScanBase): myvalue, self.portdb, dep_settings, > use="all", > mode=matchmode, trees=self.repo_settings.trees) > + > results.append(types.SimpleNamespace(atoms=atoms, success=success, > mykey=mykey, mytype=mytype)) + > + return results > + > + > + def _check_result(self, task, result): > + prof = task.prof > + keyword = task.keyword > + ebuild = task.ebuild > + unknown_pkgs = > ebuild.unknown_pkgs + > + success = result.success > + atoms = result.atoms > + mykey = result.mykey > + mytype = result.mytype > + > if success: > if atoms: > > @@ -223,7 +315,7 @@ class ProfileDependsChecks(ScanBase): > > # if we > emptied out our list, continue: if not all_atoms: > - > continue > + > return > # Filter out > duplicates. We do this by hand (rather # than use a set) so the > order is stable and better @@ -255,17 +347,6 @@ class > ProfileDependsChecks(ScanBase): % (ebuild.relative_path, mytype, > keyword, prof, pformat(atoms, indent=6))) > > - if not baddepsyntax and unknown_pkgs: > - type_map = {} > - for mytype, atom in unknown_pkgs: > - type_map.setdefault(mytype, > set()).add(atom) > - for mytype, atoms in type_map.items(): > - self.qatracker.add_error( > - "dependency.unknown", "%s: > %s: %s" > - % (ebuild.relative_path, > mytype, ", ".join(sorted(atoms)))) - > - return False > - > @property > def runInEbuilds(self): > '''Ebuild level scans''' > diff --git a/repoman/man/repoman.1 b/repoman/man/repoman.1 > index a6a9937e5..6f9a24544 100644 > --- a/repoman/man/repoman.1 > +++ b/repoman/man/repoman.1 > @@ -1,4 +1,4 @@ > -.TH "REPOMAN" "1" "Mar 2018" "Repoman VERSION" "Repoman" > +.TH "REPOMAN" "1" "Aug 2020" "Repoman VERSION" "Repoman" > .SH NAME > repoman \- Gentoo's program to enforce a minimal level of quality > assurance in packages added to the ebuild repository > @@ -83,6 +83,13 @@ Be less verbose about extraneous info > \fB-p\fR, \fB--pretend\fR > Don't commit or fix anything; just show what would be done > .TP > +\fB\-j\fR, \fB\-\-jobs\fR > +Specifies the number of jobs (processes) to run simultaneously. > +.TP > +\fB\-l\fR, \fB\-\-load-average\fR > +Specifies that no new jobs (processes) should be started if there > are others +jobs running and the load average is at least load (a > floating\-point number). +.TP > \fB-x\fR, \fB--xmlparse\fR > Forces the metadata.xml parse check to be carried out > .TP code looks good for me