In poking around the yum code, I noticed that performance of 'yum
install foo' can be greatly sped up (assuming a warm cache, decent
network connection etc.)
Right now, the way in which the user's input of a pakcage spec is
translated into a package object in packages.parsePackages forces yum to
load all the data of the packages tables into memory, i.e. forces a
'select * from packages' for each enabled repo, and the ensuing creation
of a dict with seven entries for every package. That is slow for a
number of reasons.
I attach a patch that turns that around so that the database is queried
directly when the user runs 'yum install', avoiding sucking lots of data
into memory. If this looks like something that can be committed, I'll
look into doing similar things for the other uses of
packages.parsePackages.
I did some profiling without and with the patch on a fairly beefy FC6
machine with core, extras and updates enabled (a total of 8700 packages)
Profiling times are based on a warm cache, local repo mirrors and are
the average of a handful of runs. I used two simple tests:
* Install of a nonexisting package; this is one use that should be
fast because it's frustrating to have yum take a long time to
tell the user about a type. Initially, running 'yum -d 0 -e 0
install foobar' took 2.9s (user + system), with 1.3s spent in
loading the package metadata and 0.5 s in packages.parsePackages
(the patch eliminates the metadata loading step). With the patch
applied, this now takes 0.8s, with 0.2s for parsing packages
* Install of a single package, another operation that users expect
to be fast; I ran 'yum -d 0 -e 0 -y install xpdf' repeatedly for
this. Initially, this took 4.3s with 1.3s for loading the
package metadata, and 0.5s for packages.parsePackages. With the
patch, this now takes 2.7s with 0.2s spent in parsePackages
David
? .project
? build
? profile.txt
? profu.py
Index: cli.py
===================================================================
RCS file: /cvsroot/yum/cvs/yum/cli.py,v
retrieving revision 1.247
diff -u -r1.247 cli.py
--- cli.py 7 Dec 2006 23:25:56 -0000 1.247
+++ cli.py 15 Jan 2007 19:39:32 -0000
@@ -511,7 +511,6 @@
self.doRepoSetup()
self.doRpmDBSetup()
- avail = self.pkgSack.returnPackages()
toBeInstalled = {} # keyed on name
passToUpdate = [] # list of pkgtups to pass along to updatecheck
@@ -524,8 +523,7 @@
# no matter what we don't go looking at repos
arglist = [arg]
- exactmatch, matched, unmatched = parsePackages(avail, arglist,
- casematch=1)
+ exactmatch, matched, unmatched = self.pkgSack.parsePackages(arglist)
if len(unmatched) > 0: # if we get back anything in unmatched, check it for a virtual-provide
arg = unmatched[0] #only one in there
self.verbose_logger.debug('Checking for virtual provide or file-provide for %s',
@@ -538,7 +536,7 @@
arg = '%s:%s-%s-%s.%s' % (mypkg.epoch, mypkg.name,
mypkg.version, mypkg.release,
mypkg.arch)
- emtch, mtch, unmtch = parsePackages(avail, [arg])
+ emtch, mtch, unmtch = self.pkgSack.parsePackages([arg])
exactmatch.extend(emtch)
matched.extend(mtch)
Index: yum/packageSack.py
===================================================================
RCS file: /cvsroot/yum/cvs/yum/yum/packageSack.py,v
retrieving revision 1.23
diff -u -r1.23 packageSack.py
--- yum/packageSack.py 19 Dec 2006 03:43:31 -0000 1.23
+++ yum/packageSack.py 15 Jan 2007 19:39:32 -0000
@@ -18,6 +18,7 @@
import warnings
import re
import fnmatch
+import misc
class PackageSackBase(object):
"""Base class that provides the interface for PackageSacks."""
@@ -317,6 +318,28 @@
def searchAll(self, arg, query_type):
return self._computeAggregateListResult("searchAll", arg, query_type)
+ def parsePackages(self, pkgspecs):
+ exactmatch = []
+ matched = []
+ unmatched = []
+ for sack in self.sacks.values():
+ if hasattr(sack, "parsePackages"):
+ e, m, u = None, None, None
+ try:
+ e, m, u = sack.parsePackages(pkgspecs)
+ except PackageSackError:
+ continue
+
+ if e:
+ exactmatch.extend(e)
+ matched.extend(m)
+ unmatched.extend(u)
+
+ matched = misc.unique(matched)
+ unmatched = misc.unique(unmatched)
+ exactmatch = misc.unique(exactmatch)
+ return exactmatch, matched, unmatched
+
def _computeAggregateListResult(self, methodName, *args):
result = []
for sack in self.sacks.values():
Index: yum/sqlitesack.py
===================================================================
RCS file: /cvsroot/yum/cvs/yum/yum/sqlitesack.py,v
retrieving revision 1.54
diff -u -r1.54 sqlitesack.py
--- yum/sqlitesack.py 15 Dec 2006 14:40:55 -0000 1.54
+++ yum/sqlitesack.py 15 Jan 2007 19:39:34 -0000
@@ -27,7 +27,7 @@
import Errors
import misc
-from sqlutils import executeSQL
+from sqlutils import executeSQL, globToLike
# Simple subclass of YumAvailablePackage that can load 'simple headers' from
# the database when they are requested
@@ -499,6 +499,31 @@
raise Errors.PackageSackError, 'No Package Matching %s' % name
return misc.newestInList(allpkg)
+ # Do what packages.parsePackages does, but query the DB directly
+ def parsePackages(self, pkgspecs):
+ exactmatch = []
+ matched = []
+ unmatched = []
+ for p in pkgspecs:
+ q = globToLike(p)
+ if q:
+ query = PARSE_QUERY % ({ "op": "like", "q": q })
+ else:
+ query = PARSE_QUERY % ({ "op": "=", "q": p })
+
+ for (rep, db) in self.primarydb.items():
+ cur = db.cursor()
+ executeSQL(cur, query)
+ res = cur.fetchall()
+ if len(res) == 0:
+ unmatched.append(p)
+ else:
+ exactmatch.extend(map(lambda x: self.pc(rep,self.db2class(x,True)), res))
+ matched = misc.unique(matched)
+ unmatched = misc.unique(unmatched)
+ exactmatch = misc.unique(exactmatch)
+ return exactmatch, matched, unmatched
+
def returnPackages(self, repoid=None):
"""Returns a list of packages, only containing nevra information """
returnList = []
@@ -506,7 +531,7 @@
if (repoid == None or repoid == repo.id):
cur = cache.cursor()
executeSQL(cur, "select pkgId,name,epoch,version,release,arch from packages")
- for x in cur.fetchall():
+ for x in cur:
if (self.excludes[repo].has_key(x['pkgId'])):
continue
returnList.append(self.pc(repo,self.db2class(x,True)))
@@ -585,3 +610,18 @@
string2ft = {'f':'file','d': 'dir','g': 'ghost'}
return [string2ft[x] for x in filetypestring]
+
+# Query used by parsePackages
+# op is either '=' or 'like', q is the search term
+# Check against name, nameArch, nameVerRelArch, nameVer, nameVerRel,
+# envra, nevra
+PARSE_QUERY = """
+select pkgId, name, arch, epoch, version, release from packages
+where name %(op)s '%(q)s'
+ or name || '.' || arch %(op)s '%(q)s'
+ or name || '-' || version %(op)s '%(q)s'
+ or name || '-' || version || '-' || release %(op)s '%(q)s'
+ or name || '-' || version || '-' || release || '.' || arch %(op)s '%(q)s'
+ or epoch || ':' || name || '-' || version || '-' || release || '.' || arch %(op)s '%(q)s'
+ or name || '-' || epoch || ':' || version || '-' || release || '.' || arch %(op)s '%(q)s'
+"""
Index: yum/sqlutils.py
===================================================================
RCS file: /cvsroot/yum/cvs/yum/yum/sqlutils.py,v
retrieving revision 1.1
diff -u -r1.1 sqlutils.py
--- yum/sqlutils.py 5 Dec 2006 20:51:29 -0000 1.1
+++ yum/sqlutils.py 15 Jan 2007 19:39:34 -0000
@@ -150,5 +150,16 @@
else:
executeSQL = executeSQLPyFormat
+# Convert the string S from a glob expression containing '*' and '?'
+# to a search expression for a SQL like query containing '%' and '?'
+# Return None if S can be used with a straight equal comparison, i.e. does
+# not contain at least one '*' or '?'
+def globToLike(s):
+ if s.find('*') >= 0:
+ return s.replace('*', '%')
+ elif s.find('?') >= 0:
+ return s
+ else:
+ return None
_______________________________________________
Yum-devel mailing list
[email protected]
https://lists.dulug.duke.edu/mailman/listinfo/yum-devel