Hi!
Some optimizations for the SqliteSack:
Move most operation to be based on pkgKey instead of pkgId to gain some speed.
Generate only one package object per package to avoid duplicates.
Load only minimal tags per default.
Patch is quite straight forward although it changes a lot of lines and more
or less all SQL statements. I see an 15% speed up for most use cases and
about 10% memory savings.
I kept .excludes and ._excluded() although they are no longer needed within
the SqliteSack itself. Don't know if they are part of the API...
Have fun
Florian
>From ec4319a5ffbf995d1c0fa3a197a7249d0eeb5e39 Mon Sep 17 00:00:00 2001
From: Florian Festi <[EMAIL PROTECTED]>
Date: Fri, 17 Aug 2007 16:16:34 +0200
Subject: [PATCH] Move most operation to be based on pkgKey instead of pkgId to gain some speed.
Generate only one package object per package to avoid duplicates.
Load only minimal tags per default.
---
yum/sqlitesack.py | 146 +++++++++++++++++++++++++++-------------------------
1 files changed, 76 insertions(+), 70 deletions(-)
diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index 0e301ae..51cbd01 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -201,15 +201,17 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
def __init__(self, packageClass):
# Just init as usual and create a dict to hold the databases
- yumRepo.YumPackageSack.__init__(self,packageClass)
+ yumRepo.YumPackageSack.__init__(self, packageClass)
self.primarydb = {}
self.filelistsdb = {}
self.otherdb = {}
self.excludes = {}
+ self._excludes = set() # of (repo, pkgKey)
self._search_cache = {
'provides' : { },
'requires' : { },
}
+ self._key2pkg = {}
def __len__(self):
for (rep,cache) in self.primarydb.items():
@@ -238,6 +240,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
if not self.excludes.has_key(obj.repo):
self.excludes[obj.repo] = {}
self.excludes[obj.repo][obj.pkgId] = 1
+ self._excludes.add( (obj.repo, obj.pkgKey) )
self.pkglist = None
@@ -247,6 +250,22 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
return True
return False
+
+ def _pkgKeyExcluded(self, repo, pkgKey):
+ return (repo, pkgKey) in self._excludes
+
+ def _pkgExcluded(self, po):
+ return (po.repo, po.pkgKey) in self._excludes
+
+ def _packageByKey(self, repo, pkgKey):
+ if not self._key2pkg.has_key(repo):
+ self._key2pkg[repo] = {}
+ if not self._key2pkg[repo].has_key(pkgKey):
+ cur = self.primarydb[repo].cursor()
+ executeSQL(cur, "select pkgKey, pkgId, name, epoch, version, release from packages where pkgKey = ?", (pkgKey,))
+ po = self.pc(repo, cur.fetchone())
+ self._key2pkg[repo][pkgKey] = po
+ return self._key2pkg[repo][pkgKey]
def addDict(self, repo, datatype, dataobj, callback=None):
if self.added.has_key(repo):
@@ -317,19 +336,22 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
for (rep,cache) in self.filelistsdb.items():
cur = cache.cursor()
+ if glob:
+ dirname_check = ""
+ else:
+ dirname = os.path.dirname(name)
+ dirname_check = "dirname = '%s' and " % dirname
+
# grab the entries that are a single file in the
# filenames section, use sqlites globbing if it is a glob
- executeSQL(cur, "select packages.pkgId as pkgId from filelist, \
- packages where packages.pkgKey = filelist.pkgKey and \
- length(filelist.filetypes) = 1 and \
- filelist.dirname || ? || filelist.filenames \
- %s ?" % querytype, ('/', name))
+ executeSQL(cur, "select pkgKey from filelist where \
+ %s length(filetypes) = 1 and \
+ dirname || ? || filenames \
+ %s ?" % (dirname_check, querytype), ('/', name))
for ob in cur:
- if self._excluded(rep, ob['pkgId']):
+ if self._pkgKeyExcluded(rep, ob['pkgKey']):
continue
- pkg = self.getPackageDetails(ob['pkgId'])
- po = self.pc(rep, pkg)
- pkgs.append(po)
+ pkgs.append(self._packageByKey(rep, ob['pkgKey']))
def filelist_globber(dirname, filenames):
files = filenames.split('/')
@@ -340,27 +362,17 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
matches = filter(lambda x: name==x, fns)
return len(matches)
- if glob:
- dirname_check = ""
- else:
- dirname = os.path.dirname(name)
- dirname_check = "filelist.dirname = '%s' and " % dirname
-
cache.create_function("filelist_globber", 2, filelist_globber)
# for all the ones where filenames is multiple files,
# make the files up whole and use python's globbing method
- executeSQL(cur, "select packages.pkgId as pkgId \
- from filelist, packages where \
- %s length(filelist.filetypes) > 1 \
- and filelist_globber(filelist.dirname,filelist.filenames) \
- and packages.pkgKey = filelist.pkgKey " % dirname_check)
+ executeSQL(cur, "select pkgKey from filelist where \
+ %s length(filetypes) > 1 \
+ and filelist_globber(dirname,filenames)" % dirname_check)
for ob in cur:
- if self._excluded(rep, ob['pkgId']):
+ if self._pkgKeyExcluded(rep, ob['pkgKey']):
continue
- pkg = self.getPackageDetails(ob['pkgId'])
- po = self.pc(rep, pkg)
- pkgs.append(po)
+ pkgs.append(self._packageByKey(rep, ob['pkgKey']))
pkgs = misc.unique(pkgs)
return pkgs
@@ -371,7 +383,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
if len(fields) < 1:
return result
- basestring="select DISTINCT pkgId from packages where %s like '%%%s%%' " % (fields[0], searchstring)
+ basestring="select DISTINCT pkgKey from packages where %s like '%%%s%%' " % (fields[0], searchstring)
for f in fields[1:]:
basestring = "%s or %s like '%%%s%%' " % (basestring, f, searchstring)
@@ -380,11 +392,9 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
cur = cache.cursor()
executeSQL(cur, basestring)
for ob in cur:
- if self._excluded(rep, ob['pkgId']):
+ if self._pkgKeyExcluded(rep, ob['pkgKey']):
continue
- pkg = self.getPackageDetails(ob['pkgId'])
- result.append((self.pc(rep,pkg)))
-
+ result.append(self._packageByKey(rep, ob['pkgKey']))
return result
def returnObsoletes(self):
@@ -392,7 +402,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
executeSQL(cur, "select packages.name as name,\
- packages.pkgId as pkgId,\
+ packages.pkgKey as pkgKey,\
packages.arch as arch, packages.epoch as epoch,\
packages.release as release, packages.version as version,\
obsoletes.name as oname, obsoletes.epoch as oepoch,\
@@ -402,7 +412,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
for ob in cur:
# If the package that is causing the obsoletes is excluded
# continue without processing the obsoletes
- if self._excluded(rep, ob['pkgId']):
+ if self._pkgKeyExcluded(rep, ob['pkgKey']):
continue
key = ( ob['name'],ob['arch'],
@@ -464,12 +474,9 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
if rpmUtils.miscutils.rangeCompare(req, val):
tmp.setdefault(x['pkgKey'], []).append(val)
for pkgKey, hits in tmp.iteritems():
- executeSQL(cur, "select * from packages where pkgKey=?",
- (pkgKey,))
- x = cur.fetchone()
- if self._excluded(rep,x['pkgId']):
+ if self._pkgKeyExcluded(rep, pkgKey):
continue
- result[self.pc(rep,x)] = hits
+ result[self._packageByKey(rep, pkgKey)] = hits
if prcotype != 'provides' or name[0] != '/':
self._search_cache[prcotype][req] = result
@@ -492,11 +499,11 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
# If it is a filename, search the primary.xml file info
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
- executeSQL(cur, "select DISTINCT packages.* from files,packages where files.name = ? and files.pkgKey = packages.pkgKey", (name,))
- for x in cur:
- if self._excluded(rep,x['pkgId']):
+ executeSQL(cur, "select DISTINCT pkgKey from files where name = ?", (name,))
+ for ob in cur:
+ if self._pkgKeyExcluded(rep, ob['pkgKey']):
continue
- result[self.pc(rep,x)] = [(name, None, None)]
+ result[self._packageByKey(rep, ob['pkgKey'])] = [(name, None, None)]
self._search_cache[prcotype][req] = result
return result
@@ -518,11 +525,11 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
results = []
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
- executeSQL(cur, "select DISTINCT packages.* from %s,packages where %s.name %s ? and %s.pkgKey=packages.pkgKey" % (prcotype,prcotype,querytype,prcotype), (name,))
- for x in cur:
- if self._excluded(rep, x['pkgId']):
+ executeSQL(cur, "select DISTINCT pkgKey from %s where name %s ?" % (prcotype,querytype), (name,))
+ for ob in cur:
+ if self._pkgKeyExcluded(rep, ob['pkgKey']):
continue
- results.append(self.pc(rep, x))
+ results.append(self._packageByKey(rep, ob['pkgKey']))
# If it's not a provides or a filename, we are done
if prcotype != "provides" or name[0] != '/':
@@ -532,11 +539,11 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
# If it is a filename, search the primary.xml file info
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
- executeSQL(cur, "select DISTINCT packages.* from files,packages where files.name %s ? and files.pkgKey = packages.pkgKey" % querytype, (name,))
- for x in cur:
- if self._excluded(rep,x['pkgId']):
+ executeSQL(cur, "select DISTINCT pkgKey from files where name %s ?" % querytype, (name,))
+ for ob in cur:
+ if self._pkgKeyExcluded(rep, ob['pkgKey']):
continue
- results.append(self.pc(rep,x))
+ results.append(self._packageByKey(rep, ob['pkgKey']))
matched = 0
globs = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
@@ -651,9 +658,9 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
simplelist = []
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
- executeSQL(cur, "select pkgId,name,epoch,version,release,arch from packages")
+ executeSQL(cur, "select pkgKey,name,epoch,version,release,arch from packages")
for pkg in cur:
- if self._excluded(rep, pkg['pkgId']):
+ if self._pkgKeyExcluded(rep, pkg['pkgKey']):
continue
simplelist.append((pkg['name'], pkg['arch'], pkg['epoch'], pkg['version'], pkg['release']))
@@ -673,11 +680,11 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
allpkg = []
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
- executeSQL(cur, "select pkgId,name,epoch,version,release,arch from packages where name=? and arch=?",naTup)
- for x in cur:
- if self._excluded(rep, x['pkgId']):
- continue
- allpkg.append(self.pc(rep,x))
+ executeSQL(cur, "select pkgKey from packages where name=? and arch=?",naTup)
+ for ob in cur:
+ if self._pkgKeyExcluded(rep, ob['pkgKey']):
+ continue
+ allpkg.append(self._packageByKey(rep, ob['pkgKey']))
# if we've got zilch then raise
if not allpkg:
@@ -694,11 +701,11 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
allpkg = []
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
- executeSQL(cur, "select pkgId,name,epoch,version,release,arch from packages where name=?", (name,))
- for x in cur:
- if self._excluded(rep, x['pkgId']):
- continue
- allpkg.append(self.pc(rep,x))
+ executeSQL(cur, "select pkgKey from packages where name=?", (name,))
+ for ob in cur:
+ if self._pkgKeyExcluded(rep, ob['pkgKey']):
+ continue
+ allpkg.append(self._packageByKey(rep, ob['pkgKey']))
# if we've got zilch then raise
if not allpkg:
@@ -723,11 +730,11 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
cur = db.cursor()
executeSQL(cur, query)
for pkg in cur:
- if self._excluded(rep, pkg['pkgId']):
+ if self._pkgKeyExcluded(rep, pkg['pkgKey']):
continue
if p in unmatched:
unmatched.remove(p)
- matchres.append(self.pc(rep, pkg))
+ matchres.append(self._packageByKey(rep, pkg['pkgKey']))
exactmatch = misc.unique(exactmatch)
matched = misc.unique(matched)
@@ -774,7 +781,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
return returnList
# make up our execute string
- q = "select * from packages WHERE"
+ q = "select pkgKey from packages WHERE"
for (col, var) in [('name', name), ('epoch', epoch), ('version', ver),
('arch', arch), ('release', rel)]:
if var:
@@ -786,12 +793,11 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
# Search all repositories
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
- #executeSQL(cur, "select * from packages WHERE name = %s AND epoch = %s AND version = %s AND release = %s AND arch = %s" , (name,epoch,ver,rel,arch))
executeSQL(cur, q)
- for x in cur:
- if self._excluded(rep, x['pkgId']):
+ for ob in cur:
+ if self._pkgKeyExcluded(rep, ob['pkgKey']):
continue
- returnList.append(self.pc(rep,x))
+ returnList.append(self._packageByKey(rep, ob['pkgKey']))
return returnList
def excludeArchs(self, archlist):
@@ -803,7 +809,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
for (rep, cache) in self.primarydb.items():
cur = cache.cursor()
- myq = "select pkgId from packages where arch not in %s" % arch_query
+ myq = "select pkgId, pkgKey from packages where arch not in %s" % arch_query
executeSQL(cur, myq)
for row in cur:
obj = self.pc(rep,row)
@@ -841,7 +847,7 @@ def decodefiletypelist(filetypestring):
# Check against name, nameArch, nameVerRelArch, nameVer, nameVerRel,
# envra, nevra
PARSE_QUERY = """
-select pkgId, name, arch, epoch, version, release from packages
+select pkgKey from packages
where name %(op)s '%(q)s'
or name || '.' || arch %(op)s '%(q)s'
or name || '-' || version %(op)s '%(q)s'
--
1.5.2.2
_______________________________________________
Yum-devel mailing list
[email protected]
https://lists.dulug.duke.edu/mailman/listinfo/yum-devel