This adds an egencache --update-pkg-desc-index action which generates
a plain-text index of package names, versions, and descriptions. The
index can then be used to optimize emerge --search / --searchdesc
actions. If the package description index is missing from a particular
repository, then all metadata for that repository is obtained using the
normal pordbapi.aux_get method.

Searching of installed packages is optimized to take advantage of
vardbdbapi._aux_cache, which is backed by vardb_metadata.pickle.
See the IndexedVardb docstring some more details.

X-Gentoo-Bug: 525718
X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718
---
 bin/egencache         |  43 ++++++++++-
 man/egencache.1       |   4 ++
 man/portage.5         |   6 ++
 pym/_emerge/search.py | 196 ++++++++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 232 insertions(+), 17 deletions(-)

diff --git a/bin/egencache b/bin/egencache
index e366058..90d5e68 100755
--- a/bin/egencache
+++ b/bin/egencache
@@ -57,7 +57,7 @@ from portage.util._async.run_main_scheduler import 
run_main_scheduler
 from portage.util._eventloop.global_event_loop import global_event_loop
 from portage import cpv_getkey
 from portage.dep import Atom, isjustname
-from portage.versions import pkgsplit, vercmp
+from portage.versions import pkgsplit, vercmp, _pkg_str
 
 try:
        from xml.etree import ElementTree
@@ -91,6 +91,9 @@ def parse_args(args):
        actions.add_argument("--update-changelogs",
                action="store_true",
                help="update the ChangeLog files from SCM logs")
+       actions.add_argument("--update-pkg-desc-index",
+               action="store_true",
+               help="update package description index")
        actions.add_argument("--update-manifests",
                action="store_true",
                help="update manifests")
@@ -451,6 +454,35 @@ class GenCache(object):
                if hasattr(trg_cache, '_prune_empty_dirs'):
                        trg_cache._prune_empty_dirs()
 
+class GenPkgDescIndex(object):
+       def __init__(self, portdb, output_file):
+               self.returncode = os.EX_OK
+               self._portdb = portdb
+               self._output_file = output_file
+
+       def run(self):
+
+               portage.util.ensure_dirs(os.path.dirname(self._output_file))
+               f = portage.util.atomic_ofstream(self._output_file,
+                       encoding=_encodings["repo.content"])
+
+               portdb = self._portdb
+               for cp in portdb.cp_all():
+                       pkgs = portdb.cp_list(cp)
+                       if not pkgs:
+                               continue
+                       desc, = portdb.aux_get(pkgs[-1], ["DESCRIPTION"])
+
+                       if len(pkgs) == 1:
+                               output = "%s: %s\n" % (pkgs[0], desc)
+                       else:
+                               output = "%s,%s: %s\n" % (pkgs[0],
+                                       ",".join(_pkg_str(cpv).version
+                                       for cpv in pkgs[1:]), desc)
+                       f.write(output)
+
+               f.close()
+
 class GenUseLocalDesc(object):
        def __init__(self, portdb, output=None,
                        preserve_comments=False):
@@ -893,7 +925,8 @@ def egencache_main(args):
                        local_config=False, env=env)
 
        if not (options.update or options.update_use_local_desc or
-                       options.update_changelogs or options.update_manifests):
+                       options.update_changelogs or options.update_manifests or
+                       options.update_pkg_desc_index):
                parser.error('No action specified')
                return 1
 
@@ -1057,6 +1090,12 @@ def egencache_main(args):
                else:
                        ret.append(scheduler.returncode)
 
+       if options.update_pkg_desc_index:
+               gen_index = GenPkgDescIndex(portdb, os.path.join(
+                       repo_config.location, "metadata", "pkg_desc_index"))
+               gen_index.run()
+               ret.append(gen_index.returncode)
+
        if options.update_use_local_desc:
                gen_desc = GenUseLocalDesc(portdb,
                        output=options.uld_output,
diff --git a/man/egencache.1 b/man/egencache.1
index f71feb3..3a3197f 100644
--- a/man/egencache.1
+++ b/man/egencache.1
@@ -19,6 +19,10 @@ for the details on package atom syntax.
 .BR "\-\-update\-changelogs"
 Update the ChangeLog files from SCM logs (supported only in git repos).
 .TP
+.BR "\-\-update\-pkg\-desc\-index"
+Update the package description index which is located at
+\fImetadata/pkg_desc_index\fR in the repository.
+.TP
 .BR "\-\-update\-use\-local\-desc"
 Update the \fIprofiles/use.local.desc\fR file from metadata.xml.
 .TP
diff --git a/man/portage.5 b/man/portage.5
index e399f0f..26856d1 100644
--- a/man/portage.5
+++ b/man/portage.5
@@ -75,6 +75,7 @@ user\-defined package sets
 .BR /usr/portage/metadata/
 .nf
 layout.conf
+pkg_desc_index
 .fi
 .TP
 .BR /usr/portage/profiles/
@@ -1110,6 +1111,11 @@ cache\-formats = md5-dict pms
 profile\-formats = portage-2
 .fi
 .RE
+.TP
+.BR pkg_desc_index
+This is an index of packages and descriptions which may be generated
+by \fBegencache\fR(1) in order to optimize \fBemerge\fR(1) search
+actions.
 .RE
 .TP
 .BR /usr/portage/profiles/
diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py
index 4b0fd9f..bf15f11 100644
--- a/pym/_emerge/search.py
+++ b/pym/_emerge/search.py
@@ -3,13 +3,17 @@
 
 from __future__ import print_function
 
+import io
 import re
 import portage
-from portage import os
+from portage import os, _encodings
 from portage.dbapi.porttree import _parse_uri_map
+from portage.dep import Atom
+from portage.exception import InvalidData
 from portage.localization import localized_size
 from portage.output import  bold, bold as white, darkgreen, green, red
 from portage.util import writemsg_stdout
+from portage.versions import _pkg_str
 
 from _emerge.Package import Package
 
@@ -30,7 +34,6 @@ class search(object):
                The list of available and installed packages is created at 
object instantiation.
                This makes successive searches faster."""
                self.settings = root_config.settings
-               self.vartree = root_config.trees["vartree"]
                self.spinner = spinner
                self.verbose = verbose
                self.searchdesc = searchdesc
@@ -41,9 +44,9 @@ class search(object):
 
                self._dbs = []
 
-               portdb = root_config.trees["porttree"].dbapi
+               portdb = IndexedPortdb(root_config.trees["porttree"].dbapi)
                bindb = root_config.trees["bintree"].dbapi
-               vardb = root_config.trees["vartree"].dbapi
+               vardb = IndexedVardb(root_config.trees["vartree"].dbapi)
 
                if not usepkgonly and portdb._have_root_eclass_dir:
                        self._dbs.append(portdb)
@@ -53,6 +56,7 @@ class search(object):
 
                self._dbs.append(vardb)
                self._portdb = portdb
+               self._vardb = vardb
 
        def _spinner_update(self):
                if self.spinner:
@@ -97,7 +101,7 @@ class search(object):
                return {}
 
        def _visible(self, db, cpv, metadata):
-               installed = db is self.vartree.dbapi
+               installed = db is self._vardb
                built = installed or db is not self._portdb
                pkg_type = "ebuild"
                if installed:
@@ -208,6 +212,20 @@ class search(object):
                                        masked=1
                                self.matches["pkg"].append([package,masked])
                        elif self.searchdesc: # DESCRIPTION searching
+                               # Check for DESCRIPTION match first, so that we 
can skip
+                               # the expensive visiblity check if it doesn't 
match.
+                               full_package = portage.best(
+                                       self._xmatch("match-all", package))
+                               try:
+                                       full_desc = self._aux_get(
+                                               full_package, 
["DESCRIPTION"])[0]
+                               except KeyError:
+                                       portage.writemsg(
+                                               "emerge: search: aux_get() 
failed, skipping\n",
+                                               noiselevel=-1)
+                                       continue
+                               if not self.searchre.search(full_desc):
+                                       continue
                                full_package = 
self._xmatch("bestmatch-visible", package)
                                if not full_package:
                                        #no match found; we don't want to query 
description
@@ -217,14 +235,8 @@ class search(object):
                                                continue
                                        else:
                                                masked=1
-                               try:
-                                       full_desc = self._aux_get(
-                                               full_package, 
["DESCRIPTION"])[0]
-                               except KeyError:
-                                       print("emerge: search: aux_get() 
failed, skipping")
-                                       continue
-                               if self.searchre.search(full_desc):
-                                       
self.matches["desc"].append([full_package,masked])
+
+                               self.matches["desc"].append((full_package, 
masked))
 
                self.sdict = self.setconfig.getSets()
                for setname in self.sdict:
@@ -262,7 +274,7 @@ class search(object):
                        bold(self.searchkey) + " ]\n")
                msg.append("[ Applications found : " + \
                        bold(str(self.mlen)) + " ]\n\n")
-               vardb = self.vartree.dbapi
+               vardb = self._vardb
                metadata_keys = set(Package.metadata_keys)
                metadata_keys.update(["DESCRIPTION", "HOMEPAGE", "LICENSE", 
"SRC_URI"])
                metadata_keys = tuple(metadata_keys)
@@ -372,7 +384,11 @@ class search(object):
        # private interface
        #
        def getInstallationStatus(self,package):
-               installed_package = self.vartree.dep_bestmatch(package)
+               installed_package = self._vardb.match(package)
+               if installed_package:
+                       installed_package = installed_package[-1]
+               else:
+                       installed_package = ""
                result = ""
                version = 
self.getVersion(installed_package,search.VERSION_RELEASE)
                if len(version) > 0:
@@ -392,3 +408,153 @@ class search(object):
                        result = ""
                return result
 
+
+class IndexedPortdb(object):
+       """
+       A portdbapi interface that uses a package description index to
+       improve performance. If the description index is missing for a
+       particular repository, then all metadata for that repository is
+       obtained using the normal pordbapi.aux_get method.
+       """
+       def __init__(self, portdb):
+               self._portdb = portdb
+               self.cpv_exists = portdb.cpv_exists
+               self.getFetchMap = portdb.getFetchMap
+               self.findname = portdb.findname
+               self._aux_cache_keys = portdb._aux_cache_keys
+               self._have_root_eclass_dir = portdb._have_root_eclass_dir
+               self._cpv_sort_ascending = portdb._cpv_sort_ascending
+               self._desc_cache = None
+               self._cp_map = None
+
+       def _init_index(self):
+               cp_map = {}
+               desc_cache = {}
+               for repo_path in self._portdb.porttrees:
+                       outside_repo = os.path.join(self._portdb.depcachedir,
+                               repo_path.lstrip(os.sep))
+                       for parent_dir in (repo_path, outside_repo):
+                               file_path = os.path.join(parent_dir,
+                                       "metadata", "pkg_desc_index")
+
+                               try:
+                                       with io.open(file_path,
+                                               
encoding=_encodings["repo.content"]) as f:
+                                               for line in f:
+                                                       pkgs, desc = 
line.split(":", 1)
+                                                       desc = desc.strip()
+                                                       pkgs = pkgs.split(",")
+                                                       if not pkgs[0]:
+                                                               continue
+                                                       try:
+                                                               pkg = 
_pkg_str(pkgs[0])
+                                                       except InvalidData:
+                                                               continue
+                                                       cp_list = 
cp_map.get(pkg.cp)
+                                                       if cp_list is None:
+                                                               cp_list = []
+                                                               cp_map[pkg.cp] 
= cp_list
+                                                       cp_list.append(pkg)
+                                                       for ver in pkgs[1:]:
+                                                               try:
+                                                                       
cp_list.append(
+                                                                               
_pkg_str(pkg.cp + "-" + ver))
+                                                               except 
InvalidData:
+                                                                       pass
+                                                       for cpv in cp_list:
+                                                               desc_cache[cpv] 
= desc
+                               except IOError:
+                                       pass
+                               else:
+                                       break
+                       else:
+                               # No descriptions index was found, so populate
+                               # cp_map the slow way.
+                               for cp in 
self._portdb.cp_all(trees=[repo_path]):
+                                       cp_list = cp_map.get(cp)
+                                       if cp_list is None:
+                                               cp_list = []
+                                               cp_map[cp] = cp_list
+                                       for cpv in self._portdb.cp_list(cp, 
mytree=repo_path):
+                                               if cpv not in cp_list:
+                                                       
cp_list.append(_pkg_str(cpv))
+
+               self._desc_cache = desc_cache
+               self._cp_map = cp_map
+
+       def cp_all(self):
+               if self._cp_map is None:
+                       self._init_index()
+               return list(self._cp_map)
+
+       def match(self, atom):
+               if not isinstance(atom, Atom):
+                       atom = Atom(atom)
+               cp_list = self._cp_map.get(atom.cp)
+               if cp_list is None:
+                       return []
+               self._portdb._cpv_sort_ascending(cp_list)
+               return portage.match_from_list(atom, cp_list)
+
+       def aux_get(self, cpv, attrs, myrepo = None):
+               if len(attrs) == 1 and attrs[0] == "DESCRIPTION":
+                       try:
+                               return [self._desc_cache[cpv]]
+                       except KeyError:
+                               pass
+               return self._portdb.aux_get(cpv, attrs)
+
+
+class IndexedVardb(object):
+       """
+       A vardbapi interface that sacrifices validation in order to
+       improve performance. It takes advantage of vardbdbapi._aux_cache,
+       which is backed by vardb_metadata.pickle. Since _aux_cache is
+       not updated for every single merge/unmerge (see
+       _aux_cache_threshold), the list of packages is obtained directly
+       from the real vardbapi instance. If a package is missing from
+       _aux_cache, then its metadata is obtained using the normal
+       (validated) vardbapi.aux_get method.
+       """
+       def __init__(self, vardb):
+               self._vardb = vardb
+               self._aux_cache_keys = vardb._aux_cache_keys
+               self._cpv_sort_ascending = vardb._cpv_sort_ascending
+               self._cp_map = {}
+               self.cpv_exists = vardb.cpv_exists
+
+       def cp_all(self):
+               if self._cp_map:
+                       return list(self._cp_map)
+               cp_map = self._cp_map
+               for cpv in self._vardb.cpv_all():
+                       cp = portage.cpv_getkey(cpv)
+                       if cp is not None:
+                               cp_list = cp_map.get(cp)
+                               if cp_list is None:
+                                       cp_list = []
+                                       cp_map[cp] = cp_list
+                               cp_list.append(_pkg_str(cpv))
+               return list(cp_map)
+
+       def match(self, atom):
+               if not isinstance(atom, Atom):
+                       atom = Atom(atom)
+               cp_list = self._cp_map.get(atom.cp)
+               if cp_list is None:
+                       return []
+               self._vardb._cpv_sort_ascending(cp_list)
+               return portage.match_from_list(atom, cp_list)
+
+       def aux_get(self, cpv, attrs, myrepo = None):
+               pkg_data = self._vardb._aux_cache["packages"].get(cpv)
+               if not isinstance(pkg_data, tuple) or \
+                       len(pkg_data) != 2 or \
+                       not isinstance(pkg_data[1], dict):
+                       pkg_data = None
+               if pkg_data is None:
+                       # It may be missing from _aux_cache due to
+                       # _aux_cache_threshold.
+                       return self._vardb.aux_get(cpv, attrs)
+               metadata = pkg_data[1]
+               return [metadata.get(k, "") for k in attrs]
-- 
2.0.4

Reply via email to