On Mon, 2008-01-21 at 16:24 -0500, seth vidal wrote:
> Hi,
>  The attached patch looks like it fixes it and makes the results output
> MUCH faster when you search for common things like 'devel'.
> 

After talking with ignacio vazquez and james a. on irc a bit the
following patch sprung forth. Applied against head it is faster than we
were in the previous patch and it lets you ctrl-c. :)

-sv

diff --git a/yum/__init__.py b/yum/__init__.py
index 8489562..421e15f 100644
--- a/yum/__init__.py
+++ b/yum/__init__.py
@@ -1349,77 +1349,63 @@ class YumBase(depsolve.Depsolve):
             else:
                 sql_fields.append(f)
 
-        scores = {}
-        my_sets = {}
         matched_values = {}
 
-        def __sortbyVal(x, y):
-            (k, v) = x
-            (k2, v2) = y
-            if v > v2:
-                return 1
-            if v < v2:
-                return -1
-            if v == v2:
-                return 0
-        
-        # go through each item in the criteria list
-        # figure out if it matches and what it matches
-        # tally up the scores for the pkgs
         # yield the results in order of most terms matched first
-        
-        for s in criteria:
-            narrowed_list = []
-            my_sets[s] = []
-            if s.find('%') != -1:
-                continue
-            
-            for sack in self.pkgSack.sacks.values():
-                narrowed_list.extend(sack.searchPrimaryFields(sql_fields, s))
-        
-            for po in narrowed_list:
-                tmpvalues = []
+        sorted_lists = {}
+        tmpres = []
+        for sack in self.pkgSack.sacks.values():
+            tmpres.extend(sack.searchPrimaryFieldsMultipleStrings(sql_fields, criteria))
+
+        for (po, count) in tmpres:
+            # check the pkg for sanity
+            # pop it into the sorted lists
+            tmpvalues = []
+            if count not in sorted_lists: sorted_lists[count] = []
+            for s in criteria:
+                if s.find('%') != -1:
+                    continue
+                    
+
                 for field in fields:
                     value = getattr(po, field)
                     if value and value.lower().find(s.lower()) != -1:
                         tmpvalues.append(value)
 
-                if len(tmpvalues) > 0:
-                    matched_values[po] = tmpvalues
-                    my_sets[s].append(po)
-                    
-            for po in self.rpmdb:
-                tmpvalues = []
+            if len(tmpvalues) > 0:
+                sorted_lists[count].append((po, tmpvalues))
+
+            
+        
+        for po in self.rpmdb:
+            tmpvalues = []
+            criteria_matched = 0
+            for s in criteria:
+                matched_s = False
                 for field in fields:
                     value = getattr(po, field)
                     if value and value.lower().find(s.lower()) != -1:
+                        if not matched_s:
+                            criteria_matched += 1
+                            matched_s = True
+                        
                         tmpvalues.append(value)
 
-                if len(tmpvalues) > 0:
-                    matched_values[po] = tmpvalues
-                    my_sets[s].append(po)
-        
-        for pkg in matched_values:
-            if scores.has_key(pkg):
-                continue
-            count = 0
-            
-            for this_set in my_sets.itervalues():
-                if pkg in this_set:
-                    count += 1
-            
-            scores[pkg] = count
 
-        i = scores.items()
-        i.sort(__sortbyVal)
-        i.reverse()
-        
-        for (pkg,count) in i:
-            if matched_values.has_key(pkg):
-                yield (pkg, matched_values[pkg])
-            else:
-                print pkg
-            
+            if len(tmpvalues) > 0:
+                if criteria_matched not in sorted_lists: sorted_lists[criteria_matched] = []
+                sorted_lists[criteria_matched].append((po, tmpvalues))
+                
+
+        # close our rpmdb connection so we can ctrl-c, kthxbai                    
+        self.closeRpmDB()
+
+        sortvalues = sorted_lists.keys()
+        sortvalues.sort()
+        sortvalues.reverse()
+        for val in sortvalues:
+            for (po, matched) in sorted_lists[val]:
+                yield (po, matched)
 
 
     def searchPackages(self, fields, criteria, callback=None):
diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index d5e2b0a..6675d01 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -423,6 +423,50 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
                 if self._pkgKeyExcluded(rep, ob['pkgKey']):
                     continue
                 result.append(self._packageByKey(rep, ob['pkgKey']))
+        return result    
+
+    @catchSqliteException
+    def searchPrimaryFieldsMultipleStrings(self, fields, searchstrings):
+        """search arbitrary fields from the primarydb for a multiple strings
+           return packages, number of items it matched as a list of tuples"""
+           
+        result = [] # (pkg, num matches)
+        if len(fields) < 1:
+            return result
+        
+       
+        unionstring = "select pkgKey, SUM(cumul) AS total from ( "
+        endunionstring = ")GROUP BY pkgKey ORDER BY total DESC"
+                
+        #SELECT pkgkey, SUM(cumul) AS total FROM (SELECT pkgkey, 1 
+        #AS cumul FROM packages WHERE description LIKE '%foo%' UNION ... ) 
+        #GROUP BY pkgkey ORDER BY total DESC;
+        selects = []
+        
+        # select pkgKey, 1 AS cumul from packages where description 
+        # like '%devel%' or description like '%python%' or description like '%ssh%'
+#        for f in fields:
+#            basestring = "select pkgKey, 1 AS cumul from packages where %s like '%%%s%%' " % (f,searchstrings[0]) 
+#            for s in searchstrings[1:]:
+#                basestring = "%s or %s like '%%%s%%' " % (basestring, f, s)
+#            selects.append(basestring)
+            
+        for s in searchstrings:         
+            basestring="select pkgKey,1 AS cumul from packages where %s like '%%%s%%' " % (fields[0], s)
+            for f in fields[1:]:
+                basestring = "%s or %s like '%%%s%%' " % (basestring, f, s)
+            selects.append(basestring)
+        
+        totalstring = unionstring + " UNION ALL ".join(selects) + endunionstring
+#        print totalstring
+        
+        for (rep,cache) in self.primarydb.items():
+            cur = cache.cursor()
+            executeSQL(cur, totalstring)
+            for ob in cur:
+                if self._pkgKeyExcluded(rep, ob['pkgKey']):
+                    continue
+                result.append((self._packageByKey(rep, ob['pkgKey']), ob['total']))
         return result
         
     @catchSqliteException
diff --git a/yummain.py b/yummain.py
index e3eb396..4c8cf0f 100755
--- a/yummain.py
+++ b/yummain.py
@@ -222,6 +222,10 @@ def print_stats(stats):
 
 if __name__ == "__main__":
     try:
+        if not sys.stdout.isatty():
+            import codecs, locale
+            sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
+                        
         errcode = main(sys.argv[1:])
         #errcode = cprof(main, sys.argv[1:])
         #errcode = hotshot(main, sys.argv[1:])
_______________________________________________
Yum-devel mailing list
[email protected]
https://lists.dulug.duke.edu/mailman/listinfo/yum-devel

Reply via email to