OK, after some more tests I found some more tweaks to reduce both runtime and memory usage. I'd like to postpone the LRU cache stuff a bit but get the sqlitesack changes and the additional tweaks in fast.

They include:

 * SqliteSack
  * Use pkgKey instead of pkgId
  * Generate only one package object per package (Now fixed)
  * Load only minimal tags per default.
  * Create YumAvailablePackageSqlite._files and ._checksums on demand only
 * Package
  * Make Package.pkgtup a real tuple
  * Cache hash value

These changes affect all operations but installs see higher percentage changes as they do more Sqlite operations and only few operations on the rpmdb.

F8 full install
258MB -> 181MB (30%)
71.5s -> 44.1s (38%)

1000pkg install
97.5MB -> 80MB (18%)
22.8s -> 15.4s (33%)


Any objections?

Florian
>From b64a1dda722137d6f6b87a362bb2276fb6a794d3 Mon Sep 17 00:00:00 2001
From: Florian Festi <[EMAIL PROTECTED]>
Date: Tue, 11 Dec 2007 17:31:14 +0100
Subject: [PATCH] Move most operation to be based on pkgKey instead of pkgId to gain some speed.

Generate only one package object per package to avoid duplicates.
Load only minimal tags per default.
---
 yum/sqlitesack.py |  154 ++++++++++++++++++++++++++++-------------------------
 1 files changed, 82 insertions(+), 72 deletions(-)

diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index 0c612fd..8d744f6 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -220,15 +220,17 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
 
     def __init__(self, packageClass):
         # Just init as usual and create a dict to hold the databases
-        yumRepo.YumPackageSack.__init__(self,packageClass)
+        yumRepo.YumPackageSack.__init__(self, packageClass)
         self.primarydb = {}
         self.filelistsdb = {}
         self.otherdb = {}
         self.excludes = {}
+        self._excludes = set() # of (repo, pkgKey)
         self._search_cache = {
             'provides' : { },
             'requires' : { },
             }
+        self._key2pkg = {}
 
     @catchSqliteException
     def __len__(self):
@@ -276,7 +278,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         if not self.excludes.has_key(obj.repo):
             self.excludes[obj.repo] = {}
         self.excludes[obj.repo][obj.pkgId] = 1
-        
+        self._excludes.add( (obj.repo, obj.pkgKey) )
 
     def _excluded(self, repo, pkgId):
         if self.excludes.has_key(repo):
@@ -284,6 +286,22 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
                 return True
                 
         return False
+
+    def _pkgKeyExcluded(self, repo, pkgKey):
+        return (repo, pkgKey) in self._excludes
+
+    def _pkgExcluded(self, po):
+        return (po.repo, po.pkgKey) in self._excludes
+
+    def _packageByKey(self, repo, pkgKey):
+        if not self._key2pkg.has_key(repo):
+            self._key2pkg[repo] = {}
+        if not self._key2pkg[repo].has_key(pkgKey):
+            cur = self.primarydb[repo].cursor()
+            executeSQL(cur, "select pkgKey, pkgId, name, epoch, version, release from packages where pkgKey = ?", (pkgKey,))
+            po = self.pc(repo, cur.fetchone())
+            self._key2pkg[repo][pkgKey] = po
+        return self._key2pkg[repo][pkgKey]
         
     def addDict(self, repo, datatype, dataobj, callback=None):
         if self.added.has_key(repo):
@@ -344,19 +362,22 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         for (rep,cache) in self.filelistsdb.items():
             cur = cache.cursor()
 
+            if glob:
+                dirname_check = ""
+            else:
+                dirname = os.path.dirname(name)
+                dirname_check = "dirname = '%s' and " % dirname
+
             # grab the entries that are a single file in the 
             # filenames section, use sqlites globbing if it is a glob
-            executeSQL(cur, "select packages.pkgId as pkgId from filelist, \
-                    packages where packages.pkgKey = filelist.pkgKey and \
-                    length(filelist.filetypes) = 1 and \
-                    filelist.dirname || ? || filelist.filenames \
-                    %s ?" % querytype, ('/', name))
+            executeSQL(cur, "select pkgKey from filelist where \
+                    %s length(filetypes) = 1 and \
+                    dirname || ? || filenames \
+                    %s ?" % (dirname_check, querytype), ('/', name))
             for ob in cur:
-                if self._excluded(rep, ob['pkgId']):
+                if self._pkgKeyExcluded(rep, ob['pkgKey']):
                     continue
-                pkg = self.getPackageDetails(ob['pkgId'])
-                po = self.pc(rep, pkg)
-                pkgs.append(po)
+                pkgs.append(self._packageByKey(rep, ob['pkgKey']))
 
             def filelist_globber(dirname, filenames):
                 files = filenames.split('/')
@@ -367,27 +388,17 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
                     matches = filter(lambda x: name==x, fns)
                 return len(matches)
 
-            if glob:
-                dirname_check = ""
-            else:
-                dirname = os.path.dirname(name)
-                dirname_check = "filelist.dirname = '%s' and " % dirname
-
             cache.create_function("filelist_globber", 2, filelist_globber)
             # for all the ones where filenames is multiple files, 
             # make the files up whole and use python's globbing method
-            executeSQL(cur, "select packages.pkgId as pkgId \
-                             from filelist, packages where \
-                             %s length(filelist.filetypes) > 1 \
-                             and filelist_globber(filelist.dirname,filelist.filenames) \
-                             and packages.pkgKey = filelist.pkgKey " % dirname_check)
+            executeSQL(cur, "select pkgKey from filelist where \
+                             %s length(filetypes) > 1 \
+                             and filelist_globber(dirname,filenames)" % dirname_check)
 
             for ob in cur:
-                if self._excluded(rep, ob['pkgId']):
+                if self._pkgKeyExcluded(rep, ob['pkgKey']):
                     continue
-                pkg = self.getPackageDetails(ob['pkgId'])
-                po = self.pc(rep, pkg)
-                pkgs.append(po)
+                pkgs.append(self._packageByKey(rep, ob['pkgKey']))
 
         pkgs = misc.unique(pkgs)
         return pkgs
@@ -399,7 +410,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         if len(fields) < 1:
             return result
         
-        basestring="select DISTINCT pkgId from packages where %s like '%%%s%%' " % (fields[0], searchstring)
+        basestring="select DISTINCT pkgKey from packages where %s like '%%%s%%' " % (fields[0], searchstring)
         
         for f in fields[1:]:
             basestring = "%s or %s like '%%%s%%' " % (basestring, f, searchstring)
@@ -408,11 +419,9 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             cur = cache.cursor()
             executeSQL(cur, basestring)
             for ob in cur:
-                if self._excluded(rep, ob['pkgId']):
+                if self._pkgKeyExcluded(rep, ob['pkgKey']):
                     continue
-                pkg = self.getPackageDetails(ob['pkgId'])
-                result.append((self.pc(rep,pkg)))
-         
+                result.append(self._packageByKey(rep, ob['pkgKey']))
         return result
         
     @catchSqliteException
@@ -424,7 +433,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         for (rep,cache) in self.primarydb.items():
             cur = cache.cursor()
             executeSQL(cur, "select packages.name as name,\
-                packages.pkgId as pkgId,\
+                packages.pkgKey as pkgKey,\
                 packages.arch as arch, packages.epoch as epoch,\
                 packages.release as release, packages.version as version,\
                 obsoletes.name as oname, obsoletes.epoch as oepoch,\
@@ -434,7 +443,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             for ob in cur:
                 # If the package that is causing the obsoletes is excluded
                 # continue without processing the obsoletes
-                if self._excluded(rep, ob['pkgId']):
+                if self._pkgKeyExcluded(rep, ob['pkgKey']):
                     continue
                     
                 key = ( ob['name'],ob['arch'],
@@ -499,12 +508,9 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
                 if rpmUtils.miscutils.rangeCompare(req, val):
                     tmp.setdefault(x['pkgKey'], []).append(val)
             for pkgKey, hits in tmp.iteritems():
-                executeSQL(cur, "select * from packages where pkgKey=?",
-                           (pkgKey,))
-                x = cur.fetchone()
-                if self._excluded(rep,x['pkgId']):
+                if self._pkgKeyExcluded(rep, pkgKey):
                     continue
-                result[self.pc(rep,x)] = hits
+                result[self._packageByKey(rep, pkgKey)] = hits
 
         if prcotype != 'provides' or name[0] != '/':
             self._search_cache[prcotype][req] = result
@@ -527,11 +533,11 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         # If it is a filename, search the primary.xml file info
         for (rep,cache) in self.primarydb.items():
             cur = cache.cursor()
-            executeSQL(cur, "select DISTINCT packages.* from files,packages where files.name = ? and files.pkgKey = packages.pkgKey", (name,))
-            for x in cur:
-                if self._excluded(rep,x['pkgId']):
+            executeSQL(cur, "select DISTINCT pkgKey from files where name = ?", (name,))
+            for ob in cur:
+                if self._pkgKeyExcluded(rep, ob['pkgKey']):
                     continue
-                result[self.pc(rep,x)] = [(name, None, None)]
+                result[self._packageByKey(rep, ob['pkgKey'])] = [(name, None, None)]
         self._search_cache[prcotype][req] = result
         return result
 
@@ -554,11 +560,11 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         results = []
         for (rep,cache) in self.primarydb.items():
             cur = cache.cursor()
-            executeSQL(cur, "select DISTINCT packages.* from %s,packages where %s.name %s ? and %s.pkgKey=packages.pkgKey" % (prcotype,prcotype,querytype,prcotype), (name,))
-            for x in cur:
-                if self._excluded(rep, x['pkgId']):
+            executeSQL(cur, "select DISTINCT pkgKey from %s where name %s ?" % (prcotype,querytype), (name,))
+            for ob in cur:
+                if self._pkgKeyExcluded(rep, ob['pkgKey']):
                     continue
-                results.append(self.pc(rep, x))
+                results.append(self._packageByKey(rep, ob['pkgKey']))
         
         # If it's not a provides or a filename, we are done
         if prcotype != "provides" or name[0] != '/':
@@ -568,11 +574,11 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         # If it is a filename, search the primary.xml file info
         for (rep,cache) in self.primarydb.items():
             cur = cache.cursor()
-            executeSQL(cur, "select DISTINCT packages.* from files,packages where files.name %s ? and files.pkgKey = packages.pkgKey" % querytype, (name,))
-            for x in cur:
-                if self._excluded(rep,x['pkgId']):
+            executeSQL(cur, "select DISTINCT pkgKey from files where name %s ?" % querytype, (name,))
+            for ob in cur:
+                if self._pkgKeyExcluded(rep, ob['pkgKey']):
                     continue
-                results.append(self.pc(rep,x))
+                results.append(self._packageByKey(rep, ob['pkgKey']))
         
         matched = 0
         globs = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
@@ -689,11 +695,11 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         allpkg = []
         for (rep,cache) in self.primarydb.items():
             cur = cache.cursor()
-            executeSQL(cur, "select pkgId,name,epoch,version,release,arch from packages where name=? and arch=?",naTup)
-            for x in cur:
-                if self._excluded(rep, x['pkgId']):
-                    continue                    
-                allpkg.append(self.pc(rep,x))
+            executeSQL(cur, "select pkgKey from packages where name=? and arch=?",naTup)
+            for ob in cur:
+                if self._pkgKeyExcluded(rep, ob['pkgKey']):
+                    continue
+                allpkg.append(self._packageByKey(rep, ob['pkgKey']))
         
         # if we've got zilch then raise
         if not allpkg:
@@ -711,11 +717,11 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         allpkg = []
         for (rep,cache) in self.primarydb.items():
             cur = cache.cursor()
-            executeSQL(cur, "select pkgId,name,epoch,version,release,arch from packages where name=?", (name,))
-            for x in cur:
-                if self._excluded(rep, x['pkgId']):
-                    continue                    
-                allpkg.append(self.pc(rep,x))
+            executeSQL(cur, "select pkgKey from packages where name=?", (name,))
+            for ob in cur:
+                if self._pkgKeyExcluded(rep, ob['pkgKey']):
+                    continue
+                allpkg.append(self._packageByKey(rep, ob['pkgKey']))
         
         # if we've got zilch then raise
         if not allpkg:
@@ -741,11 +747,11 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
                 cur = db.cursor()
                 executeSQL(cur, query)
                 for pkg in cur:
-                    if self._excluded(rep, pkg['pkgId']):
+                    if self._pkgKeyExcluded(rep, pkg['pkgKey']):
                         continue
                     if p in unmatched:
                         unmatched.remove(p)
-                    matchres.append(self.pc(rep, pkg))
+                    matchres.append(self._packageByKey(rep, pkg['pkgKey']))
 
         exactmatch = misc.unique(exactmatch)
         matched = misc.unique(matched)
@@ -762,9 +768,14 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             if (repoid == None or repoid == repo.id):
                 cur = cache.cursor()
                 
-                executeSQL(cur, "select pkgId,name,epoch,version,release,arch from packages")
+                executeSQL(cur, "select pkgId, pkgKey, name,epoch,version,release,arch from packages")
                 for x in cur:
-                    returnList.append(self.pc(repo,x))
+                    if self._key2pkg.get(repo, {}).has_key(x['pkgKey']):
+                        po = self._key2pkg[repo][x['pkgKey']]
+                    else:
+                        po = self.pc(repo,x)
+                        self._key2pkg.setdefault(repo, {})[po.pkgKey] = po
+                    returnList.append(po)
         self.pkgobjlist = returnList
                 
     def returnPackages(self, repoid=None):
@@ -776,7 +787,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             self._buildPkgObjList(repoid)
 
         for po in self.pkgobjlist:
-            if self._excluded(po.repo, po.pkgId):
+            if self._pkgExcluded(po):
                 continue
             returnList.append(po)
 
@@ -796,7 +807,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             return returnList
         
         # make up our execute string
-        q = "select * from packages WHERE"
+        q = "select pkgKey from packages WHERE"
         for (col, var) in [('name', name), ('epoch', epoch), ('version', ver),
                            ('arch', arch), ('release', rel)]:
             if var:
@@ -808,12 +819,11 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         # Search all repositories            
         for (rep,cache) in self.primarydb.items():
             cur = cache.cursor()
-            #executeSQL(cur, "select * from packages WHERE name = %s AND epoch = %s AND version = %s AND release = %s AND arch = %s" , (name,epoch,ver,rel,arch))
             executeSQL(cur, q)
-            for x in cur:
-                if self._excluded(rep, x['pkgId']):
+            for ob in cur:
+                if self._pkgKeyExcluded(rep, ob['pkgKey']):
                     continue
-                returnList.append(self.pc(rep,x))
+                returnList.append(self._packageByKey(rep, ob['pkgKey']))
         return returnList
     
     @catchSqliteException
@@ -826,7 +836,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
 
         for (rep, cache) in self.primarydb.items():
             cur = cache.cursor()
-            myq = "select pkgId from packages where arch not in %s" % arch_query
+            myq = "select pkgId, pkgKey from packages where arch not in %s" % arch_query
             executeSQL(cur, myq)
             for row in cur:
                 obj = self.pc(rep,row)
@@ -864,7 +874,7 @@ def decodefiletypelist(filetypestring):
 # Check against name, nameArch, nameVerRelArch, nameVer, nameVerRel,
 # envra, nevra
 PARSE_QUERY = """
-select pkgId, name, arch, epoch, version, release from packages
+select pkgKey from packages
 where name %(op)s '%(q)s'
    or name || '.' || arch %(op)s '%(q)s'
    or name || '-' || version %(op)s '%(q)s'
-- 
1.5.3.3

>From cfeeed01a5567496e33d128612db84c789f8e93f Mon Sep 17 00:00:00 2001
From: Florian Festi <[EMAIL PROTECTED]>
Date: Tue, 18 Dec 2007 10:17:33 +0100
Subject: [PATCH] Make Package.pkgtup a real tuple to save memory and runtime

---
 test/testbase.py  |    1 +
 yum/packages.py   |    8 +++-----
 yum/sqlitesack.py |    3 ++-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/testbase.py b/test/testbase.py
index ab2a35b..669ff7e 100644
--- a/test/testbase.py
+++ b/test/testbase.py
@@ -53,6 +53,7 @@ class FakePackage(packages.YumAvailablePackage):
         self.rel = release
         self.epoch = epoch
         self.arch = arch
+        self.pkgtup = (self.name, self.arch, self.epoch, self.version, self.release)
 
         self.prco['provides'].append((name, 'EQ', (epoch, version, release)))
 
diff --git a/yum/packages.py b/yum/packages.py
index 3726a92..77fb2ed 100644
--- a/yum/packages.py
+++ b/yum/packages.py
@@ -161,6 +161,7 @@ class PackageObject(object):
         self.release = None
         self.epoch = None
         self.arch = None
+        # self.pkgtup = (self.name, self.arch, self.epoch, self.version, self.release)
         self._checksums = [] # (type, checksum, id(0,1)
         
     def __str__(self):
@@ -182,11 +183,6 @@ class PackageObject(object):
                       Errors.YumFutureDeprecationWarning, stacklevel=2)
         return getattr(self, varname)
 
-    def _pkgtup(self):
-        return (self.name, self.arch, self.epoch, self.version, self.release)
-    
-    pkgtup = property(_pkgtup)
-
     def returnChecksums(self):
         return self._checksums
 
@@ -424,6 +420,7 @@ class YumAvailablePackage(PackageObject, RpmBase):
             self.importFromDict(pkgdict)
             self.ver = self.version
             self.rel = self.release
+        self.pkgtup = (self.name, self.arch, self.epoch, self.version, self.release)
 
     def exclude(self):
         """remove self from package sack"""
@@ -639,6 +636,7 @@ class YumHeaderPackage(YumAvailablePackage):
         self.release = self.hdr['release']
         self.ver = self.version
         self.rel = self.release
+        self.pkgtup = (self.name, self.arch, self.epoch, self.version, self.release)
         self.summary = self.hdr['summary'].replace('\n', '')
         self.description = self.hdr['description']
         self.pkgid = self.hdr[rpm.RPMTAG_SHA1HEADER]
diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index 8d744f6..a829bdb 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -63,7 +63,8 @@ class YumAvailablePackageSqlite(YumAvailablePackage, PackageObject, RpmBase):
         self.id = self.pkgId
         self.ver = self.version 
         self.rel = self.release 
-        
+        self.pkgtup = (self.name, self.arch, self.epoch, self.version, self.release)
+
         self._changelog = None
 
     files = property(fget=lambda self: self._loadFiles())
-- 
1.5.3.3

>From e821050a9435187b2314289d904f42a9f906d3c6 Mon Sep 17 00:00:00 2001
From: Florian Festi <[EMAIL PROTECTED]>
Date: Fri, 14 Dec 2007 11:01:28 +0100
Subject: [PATCH] cache hash value of Package objects

---
 yum/packages.py   |    7 +++++--
 yum/sqlitesack.py |    1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/yum/packages.py b/yum/packages.py
index 77fb2ed..3337a0f 100644
--- a/yum/packages.py
+++ b/yum/packages.py
@@ -208,6 +208,7 @@ class RpmBase(object):
         self.files['ghost'] = []
         self._changelog = [] # (ctime, cname, ctext)
         self.licenses = []
+        self.hash = None
 
     def __eq__(self, other):
         if not other: # check if other not is a package object. 
@@ -225,9 +226,11 @@ class RpmBase(object):
         return PackageEVR(self.epoch, self.version, self.release)
     
     def __hash__(self):
-        mystr = '%s - %s:%s-%s-%s.%s' % (self.repo.id, self.epoch, self.name,
+        if self.hash is None:
+            mystr = '%s - %s:%s-%s-%s.%s' % (self.repo.id, self.epoch, self.name,
                                          self.version, self.release, self.arch)
-        return hash(mystr)
+            self.hash = hash(mystr)
+        return self.hash
         
     def returnPrco(self, prcotype, printable=False):
         """return list of provides, requires, conflicts or obsoletes"""
diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index a829bdb..ab1d4c4 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -66,6 +66,7 @@ class YumAvailablePackageSqlite(YumAvailablePackage, PackageObject, RpmBase):
         self.pkgtup = (self.name, self.arch, self.epoch, self.version, self.release)
 
         self._changelog = None
+        self.hash = None
 
     files = property(fget=lambda self: self._loadFiles())
         
-- 
1.5.3.3

>From fdd9c78105174b7ec8a7fb45dfacf8a94b7e6412 Mon Sep 17 00:00:00 2001
From: Florian Festi <[EMAIL PROTECTED]>
Date: Tue, 18 Dec 2007 13:21:55 +0100
Subject: [PATCH] Create YumAvailablePackageSqlite._files and ._checksums on demand only

---
 yum/sqlitesack.py |    5 ++---
 1 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index ab1d4c4..3c0af05 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -48,12 +48,10 @@ def catchSqliteException(func):
 
 class YumAvailablePackageSqlite(YumAvailablePackage, PackageObject, RpmBase):
     def __init__(self, repo, db_obj):
-        self._checksums = []
         self.prco = { 'obsoletes': (),
                       'conflicts': (),
                       'requires': (),
                       'provides': () }
-        self._files = {}
         self.sack = repo.sack
         self.repoid = repo.id
         self.repo = repo
@@ -87,7 +85,8 @@ class YumAvailablePackageSqlite(YumAvailablePackage, PackageObject, RpmBase):
                 pass
 
         try:
-            self._checksums.append((db_obj['checksum_type'], db_obj['pkgId'], True))
+            check_sum = (db_obj['checksum_type'], db_obj['pkgId'], True)
+            self._checksums = [ check_sum ]
         except (IndexError, KeyError):
             pass
 
-- 
1.5.3.3

_______________________________________________
Yum-devel mailing list
[email protected]
https://lists.dulug.duke.edu/mailman/listinfo/yum-devel

Reply via email to