On Tue, 2008-01-22 at 00:10 -0500, seth vidal wrote:
> On Mon, 2008-01-21 at 16:24 -0500, seth vidal wrote:
> > Hi,
> >  The attached patch looks like it fixes it and makes the results output
> > MUCH faster when you search for common things like 'devel'.
> > 
> 
> After talking with ignacio vazquez and james a. on irc a bit the
> following patch sprung forth. Applied against head it is faster than we
> were in the previous patch and it lets you ctrl-c. :)
> 

and one more. If I don't hear any complaints I'll check this in tomorrow
after I wake up. :)


-sv

diff --git a/yum/__init__.py b/yum/__init__.py
index 8489562..efc3e8c 100644
--- a/yum/__init__.py
+++ b/yum/__init__.py
@@ -29,6 +29,8 @@ import glob
 import fnmatch
 import logging
 import logging.config
+import operator
+
 try:
     from iniparse.compat import ParsingError, ConfigParser
 except ImportError:
@@ -1349,77 +1351,64 @@ class YumBase(depsolve.Depsolve):
             else:
                 sql_fields.append(f)
 
-        scores = {}
-        my_sets = {}
         matched_values = {}
 
-        def __sortbyVal(x, y):
-            (k, v) = x
-            (k2, v2) = y
-            if v > v2:
-                return 1
-            if v < v2:
-                return -1
-            if v == v2:
-                return 0
-        
-        # go through each item in the criteria list
-        # figure out if it matches and what it matches
-        # tally up the scores for the pkgs
         # yield the results in order of most terms matched first
-        
+        sorted_lists = {}
+        tmpres = []
+        real_crit = []
         for s in criteria:
-            narrowed_list = []
-            my_sets[s] = []
-            if s.find('%') != -1:
-                continue
-            
-            for sack in self.pkgSack.sacks.values():
-                narrowed_list.extend(sack.searchPrimaryFields(sql_fields, s))
-        
-            for po in narrowed_list:
-                tmpvalues = []
+            if s.find('%') == -1:
+                real_crit.append(s)
+
+        for sack in self.pkgSack.sacks.values():
+            tmpres.extend(sack.searchPrimaryFieldsMultipleStrings(sql_fields, real_crit))
+
+        for (po, count) in tmpres:
+            # check the pkg for sanity
+            # pop it into the sorted lists
+            tmpvalues = []
+            if count not in sorted_lists: sorted_lists[count] = []
+            for s in real_crit:
                 for field in fields:
                     value = getattr(po, field)
                     if value and value.lower().find(s.lower()) != -1:
                         tmpvalues.append(value)
 
-                if len(tmpvalues) > 0:
-                    matched_values[po] = tmpvalues
-                    my_sets[s].append(po)
-                    
-            for po in self.rpmdb:
-                tmpvalues = []
+            if len(tmpvalues) > 0:
+                sorted_lists[count].append((po, tmpvalues))
+
+            
+        
+        for po in self.rpmdb:
+            tmpvalues = []
+            criteria_matched = 0
+            for s in real_crit:
+                matched_s = False
                 for field in fields:
                     value = getattr(po, field)
                     if value and value.lower().find(s.lower()) != -1:
+                        if not matched_s:
+                            criteria_matched += 1
+                            matched_s = True
+                        
                         tmpvalues.append(value)
 
-                if len(tmpvalues) > 0:
-                    matched_values[po] = tmpvalues
-                    my_sets[s].append(po)
-        
-        for pkg in matched_values:
-            if scores.has_key(pkg):
-                continue
-            count = 0
-            
-            for this_set in my_sets.itervalues():
-                if pkg in this_set:
-                    count += 1
-            
-            scores[pkg] = count
 
-        i = scores.items()
-        i.sort(__sortbyVal)
-        i.reverse()
+            if len(tmpvalues) > 0:
+                if criteria_matched not in sorted_lists: sorted_lists[criteria_matched] = []
+                sorted_lists[criteria_matched].append((po, tmpvalues))
+                
+
+        # close our rpmdb connection so we can ctrl-c, kthxbai                    
+        self.closeRpmDB()
         
-        for (pkg,count) in i:
-            if matched_values.has_key(pkg):
-                yield (pkg, matched_values[pkg])
-            else:
-                print pkg
-            
+        yielded = {}
+        for val in reversed(sorted(sorted_lists)):
+            for (po, matched) in sorted(sorted_lists[val], key=operator.itemgetter(0)):
+                if (po.name, po.arch) not in yielded:
+                    yield (po, matched)
+                    yielded[(po.name, po.arch)] = 1
 
 
     def searchPackages(self, fields, criteria, callback=None):
diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index d5e2b0a..6675d01 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -423,6 +423,50 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
                 if self._pkgKeyExcluded(rep, ob['pkgKey']):
                     continue
                 result.append(self._packageByKey(rep, ob['pkgKey']))
+        return result    
+
+    @catchSqliteException
+    def searchPrimaryFieldsMultipleStrings(self, fields, searchstrings):
+        """search arbitrary fields from the primarydb for a multiple strings
+           return packages, number of items it matched as a list of tuples"""
+           
+        result = [] # (pkg, num matches)
+        if len(fields) < 1:
+            return result
+        
+       
+        unionstring = "select pkgKey, SUM(cumul) AS total from ( "
+        endunionstring = ")GROUP BY pkgKey ORDER BY total DESC"
+                
+        #SELECT pkgkey, SUM(cumul) AS total FROM (SELECT pkgkey, 1 
+        #AS cumul FROM packages WHERE description LIKE '%foo%' UNION ... ) 
+        #GROUP BY pkgkey ORDER BY total DESC;
+        selects = []
+        
+        # select pkgKey, 1 AS cumul from packages where description 
+        # like '%devel%' or description like '%python%' or description like '%ssh%'
+#        for f in fields:
+#            basestring = "select pkgKey, 1 AS cumul from packages where %s like '%%%s%%' " % (f,searchstrings[0]) 
+#            for s in searchstrings[1:]:
+#                basestring = "%s or %s like '%%%s%%' " % (basestring, f, s)
+#            selects.append(basestring)
+            
+        for s in searchstrings:         
+            basestring="select pkgKey,1 AS cumul from packages where %s like '%%%s%%' " % (fields[0], s)
+            for f in fields[1:]:
+                basestring = "%s or %s like '%%%s%%' " % (basestring, f, s)
+            selects.append(basestring)
+        
+        totalstring = unionstring + " UNION ALL ".join(selects) + endunionstring
+#        print totalstring
+        
+        for (rep,cache) in self.primarydb.items():
+            cur = cache.cursor()
+            executeSQL(cur, totalstring)
+            for ob in cur:
+                if self._pkgKeyExcluded(rep, ob['pkgKey']):
+                    continue
+                result.append((self._packageByKey(rep, ob['pkgKey']), ob['total']))
         return result
         
     @catchSqliteException
diff --git a/yummain.py b/yummain.py
index e3eb396..13215d3 100755
--- a/yummain.py
+++ b/yummain.py
@@ -31,6 +31,9 @@ import cli
 
 def main(args):
     """This does all the real work"""
+    if not sys.stdout.isatty():
+        import codecs
+        sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
 
     def exUserCancel():
         logger.critical('\n\nExiting on user cancel')
_______________________________________________
Yum-devel mailing list
[email protected]
https://lists.dulug.duke.edu/mailman/listinfo/yum-devel

Reply via email to