In poking around the yum code, I noticed that performance of 'yum
install foo' can be greatly sped up (assuming a warm cache, decent
network connection etc.)

Right now, the way in which the user's input of a pakcage spec is
translated into a package object in packages.parsePackages forces yum to
load all the data of the packages tables into memory, i.e. forces a
'select * from packages' for each enabled repo, and the ensuing creation
of a dict with seven entries for every package. That is slow for a
number of reasons.

I attach a patch that turns that around so that the database is queried
directly when the user runs 'yum install', avoiding sucking lots of data
into memory. If this looks like something that can be committed, I'll
look into doing similar things for the other uses of
packages.parsePackages.

I did some profiling without and with the patch on a fairly beefy FC6
machine with core, extras and updates enabled (a total of 8700 packages)
Profiling times are based on a warm cache, local repo mirrors and are
the average of a handful of runs. I used two simple tests:
      * Install of a nonexisting package; this is one use that should be
        fast because it's frustrating to have yum take a long time to
        tell the user about a type. Initially, running 'yum -d 0 -e 0
        install foobar' took 2.9s (user + system), with 1.3s spent in
        loading the package metadata and 0.5 s in packages.parsePackages
        (the patch eliminates the metadata loading step). With the patch
        applied, this now takes 0.8s, with 0.2s for parsing packages
      * Install of a single package, another operation that users expect
        to be fast; I ran 'yum -d 0 -e 0 -y install xpdf' repeatedly for
        this. Initially, this took 4.3s with 1.3s for loading the
        package metadata, and 0.5s for packages.parsePackages. With the
        patch, this now takes 2.7s with 0.2s spent in parsePackages

David

? .project
? build
? profile.txt
? profu.py
Index: cli.py
===================================================================
RCS file: /cvsroot/yum/cvs/yum/cli.py,v
retrieving revision 1.247
diff -u -r1.247 cli.py
--- cli.py	7 Dec 2006 23:25:56 -0000	1.247
+++ cli.py	15 Jan 2007 19:39:32 -0000
@@ -511,7 +511,6 @@
         
         self.doRepoSetup()
         self.doRpmDBSetup()
-        avail = self.pkgSack.returnPackages()
         toBeInstalled = {} # keyed on name
         passToUpdate = [] # list of pkgtups to pass along to updatecheck
 
@@ -524,8 +523,7 @@
                          # no matter what we don't go looking at repos
 
             arglist = [arg]
-            exactmatch, matched, unmatched = parsePackages(avail, arglist, 
-                                                               casematch=1)
+            exactmatch, matched, unmatched = self.pkgSack.parsePackages(arglist)
             if len(unmatched) > 0: # if we get back anything in unmatched, check it for a virtual-provide
                 arg = unmatched[0] #only one in there
                 self.verbose_logger.debug('Checking for virtual provide or file-provide for %s', 
@@ -538,7 +536,7 @@
                     arg = '%s:%s-%s-%s.%s' % (mypkg.epoch, mypkg.name,
                                               mypkg.version, mypkg.release,
                                               mypkg.arch)
-                    emtch, mtch, unmtch = parsePackages(avail, [arg])
+                    emtch, mtch, unmtch = self.pkgSack.parsePackages([arg])
                     exactmatch.extend(emtch)
                     matched.extend(mtch)
             
Index: yum/packageSack.py
===================================================================
RCS file: /cvsroot/yum/cvs/yum/yum/packageSack.py,v
retrieving revision 1.23
diff -u -r1.23 packageSack.py
--- yum/packageSack.py	19 Dec 2006 03:43:31 -0000	1.23
+++ yum/packageSack.py	15 Jan 2007 19:39:32 -0000
@@ -18,6 +18,7 @@
 import warnings
 import re
 import fnmatch
+import misc
 
 class PackageSackBase(object):
     """Base class that provides the interface for PackageSacks."""
@@ -317,6 +318,28 @@
     def searchAll(self, arg, query_type):
         return self._computeAggregateListResult("searchAll", arg, query_type)
 
+    def parsePackages(self, pkgspecs):
+        exactmatch = []
+        matched = []
+        unmatched = []
+        for sack in self.sacks.values():
+            if hasattr(sack, "parsePackages"):
+                e, m, u = None, None, None
+                try:
+                    e, m, u = sack.parsePackages(pkgspecs)
+                except PackageSackError:
+                    continue
+
+                if e:
+                    exactmatch.extend(e)
+                    matched.extend(m)
+                    unmatched.extend(u)
+
+        matched = misc.unique(matched)
+        unmatched = misc.unique(unmatched)
+        exactmatch = misc.unique(exactmatch)
+        return exactmatch, matched, unmatched
+
     def _computeAggregateListResult(self, methodName, *args):
         result = []
         for sack in self.sacks.values():
Index: yum/sqlitesack.py
===================================================================
RCS file: /cvsroot/yum/cvs/yum/yum/sqlitesack.py,v
retrieving revision 1.54
diff -u -r1.54 sqlitesack.py
--- yum/sqlitesack.py	15 Dec 2006 14:40:55 -0000	1.54
+++ yum/sqlitesack.py	15 Jan 2007 19:39:34 -0000
@@ -27,7 +27,7 @@
 import Errors
 import misc
 
-from sqlutils import executeSQL
+from sqlutils import executeSQL, globToLike
 
 # Simple subclass of YumAvailablePackage that can load 'simple headers' from
 # the database when they are requested
@@ -499,6 +499,31 @@
             raise Errors.PackageSackError, 'No Package Matching %s' % name
         return misc.newestInList(allpkg)
 
+    # Do what packages.parsePackages does, but query the DB directly
+    def parsePackages(self, pkgspecs):
+        exactmatch = []
+        matched = []
+        unmatched = []
+        for p in pkgspecs:
+            q = globToLike(p)
+            if q:
+                query = PARSE_QUERY % ({ "op": "like", "q": q })
+            else:
+                query = PARSE_QUERY % ({ "op": "=", "q": p })
+            
+            for (rep, db) in self.primarydb.items():
+                cur = db.cursor()
+                executeSQL(cur, query)
+                res = cur.fetchall()
+                if len(res) == 0:
+                    unmatched.append(p)
+                else:
+                    exactmatch.extend(map(lambda x: self.pc(rep,self.db2class(x,True)), res))
+        matched = misc.unique(matched)
+        unmatched = misc.unique(unmatched)
+        exactmatch = misc.unique(exactmatch)
+        return exactmatch, matched, unmatched
+
     def returnPackages(self, repoid=None):
         """Returns a list of packages, only containing nevra information """
         returnList = []
@@ -506,7 +531,7 @@
             if (repoid == None or repoid == repo.id):
                 cur = cache.cursor()
                 executeSQL(cur, "select pkgId,name,epoch,version,release,arch from packages")
-                for x in cur.fetchall():
+                for x in cur:
                     if (self.excludes[repo].has_key(x['pkgId'])):
                         continue
                     returnList.append(self.pc(repo,self.db2class(x,True)))
@@ -585,3 +610,18 @@
     string2ft = {'f':'file','d': 'dir','g': 'ghost'}
     return [string2ft[x] for x in filetypestring]
 
+
+# Query used by parsePackages
+# op is either '=' or 'like', q is the search term
+# Check against name, nameArch, nameVerRelArch, nameVer, nameVerRel,
+# envra, nevra
+PARSE_QUERY = """
+select pkgId, name, arch, epoch, version, release from packages
+where name %(op)s '%(q)s'
+   or name || '.' || arch %(op)s '%(q)s'
+   or name || '-' || version %(op)s '%(q)s'
+   or name || '-' || version || '-' || release %(op)s '%(q)s'
+   or name || '-' || version || '-' || release || '.' || arch %(op)s '%(q)s'
+   or epoch || ':' || name || '-' || version || '-' || release || '.' || arch %(op)s '%(q)s'
+   or name || '-' || epoch || ':' || version || '-' || release || '.' || arch %(op)s '%(q)s'
+"""
Index: yum/sqlutils.py
===================================================================
RCS file: /cvsroot/yum/cvs/yum/yum/sqlutils.py,v
retrieving revision 1.1
diff -u -r1.1 sqlutils.py
--- yum/sqlutils.py	5 Dec 2006 20:51:29 -0000	1.1
+++ yum/sqlutils.py	15 Jan 2007 19:39:34 -0000
@@ -150,5 +150,16 @@
 else:
     executeSQL = executeSQLPyFormat
 
+# Convert the string S from a glob expression containing '*' and '?'
+# to a search expression for a SQL like query containing '%' and '?'
+# Return None if S can be used with a straight equal comparison, i.e. does
+# not contain at least one '*' or '?'
+def globToLike(s):
+    if s.find('*') >= 0:
+        return s.replace('*', '%')
+    elif s.find('?') >= 0:
+        return s
+    else:
+        return None
 
 
_______________________________________________
Yum-devel mailing list
[email protected]
https://lists.dulug.duke.edu/mailman/listinfo/yum-devel

Reply via email to