On Mon, 2008-01-21 at 16:24 -0500, seth vidal wrote:
> Hi,
> The attached patch looks like it fixes it and makes the results output
> MUCH faster when you search for common things like 'devel'.
>
After talking with ignacio vazquez and james a. on irc a bit the
following patch sprung forth. Applied against head it is faster than we
were in the previous patch and it lets you ctrl-c. :)
-sv
diff --git a/yum/__init__.py b/yum/__init__.py
index 8489562..421e15f 100644
--- a/yum/__init__.py
+++ b/yum/__init__.py
@@ -1349,77 +1349,63 @@ class YumBase(depsolve.Depsolve):
else:
sql_fields.append(f)
- scores = {}
- my_sets = {}
matched_values = {}
- def __sortbyVal(x, y):
- (k, v) = x
- (k2, v2) = y
- if v > v2:
- return 1
- if v < v2:
- return -1
- if v == v2:
- return 0
-
- # go through each item in the criteria list
- # figure out if it matches and what it matches
- # tally up the scores for the pkgs
# yield the results in order of most terms matched first
-
- for s in criteria:
- narrowed_list = []
- my_sets[s] = []
- if s.find('%') != -1:
- continue
-
- for sack in self.pkgSack.sacks.values():
- narrowed_list.extend(sack.searchPrimaryFields(sql_fields, s))
-
- for po in narrowed_list:
- tmpvalues = []
+ sorted_lists = {}
+ tmpres = []
+ for sack in self.pkgSack.sacks.values():
+ tmpres.extend(sack.searchPrimaryFieldsMultipleStrings(sql_fields, criteria))
+
+ for (po, count) in tmpres:
+ # check the pkg for sanity
+ # pop it into the sorted lists
+ tmpvalues = []
+ if count not in sorted_lists: sorted_lists[count] = []
+ for s in criteria:
+ if s.find('%') != -1:
+ continue
+
+
for field in fields:
value = getattr(po, field)
if value and value.lower().find(s.lower()) != -1:
tmpvalues.append(value)
- if len(tmpvalues) > 0:
- matched_values[po] = tmpvalues
- my_sets[s].append(po)
-
- for po in self.rpmdb:
- tmpvalues = []
+ if len(tmpvalues) > 0:
+ sorted_lists[count].append((po, tmpvalues))
+
+
+
+ for po in self.rpmdb:
+ tmpvalues = []
+ criteria_matched = 0
+ for s in criteria:
+ matched_s = False
for field in fields:
value = getattr(po, field)
if value and value.lower().find(s.lower()) != -1:
+ if not matched_s:
+ criteria_matched += 1
+ matched_s = True
+
tmpvalues.append(value)
- if len(tmpvalues) > 0:
- matched_values[po] = tmpvalues
- my_sets[s].append(po)
-
- for pkg in matched_values:
- if scores.has_key(pkg):
- continue
- count = 0
-
- for this_set in my_sets.itervalues():
- if pkg in this_set:
- count += 1
-
- scores[pkg] = count
- i = scores.items()
- i.sort(__sortbyVal)
- i.reverse()
-
- for (pkg,count) in i:
- if matched_values.has_key(pkg):
- yield (pkg, matched_values[pkg])
- else:
- print pkg
-
+ if len(tmpvalues) > 0:
+ if criteria_matched not in sorted_lists: sorted_lists[criteria_matched] = []
+ sorted_lists[criteria_matched].append((po, tmpvalues))
+
+
+ # close our rpmdb connection so we can ctrl-c, kthxbai
+ self.closeRpmDB()
+
+ sortvalues = sorted_lists.keys()
+ sortvalues.sort()
+ sortvalues.reverse()
+ for val in sortvalues:
+ for (po, matched) in sorted_lists[val]:
+ yield (po, matched)
def searchPackages(self, fields, criteria, callback=None):
diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index d5e2b0a..6675d01 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -423,6 +423,50 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
if self._pkgKeyExcluded(rep, ob['pkgKey']):
continue
result.append(self._packageByKey(rep, ob['pkgKey']))
+ return result
+
+ @catchSqliteException
+ def searchPrimaryFieldsMultipleStrings(self, fields, searchstrings):
+ """search arbitrary fields from the primarydb for a multiple strings
+ return packages, number of items it matched as a list of tuples"""
+
+ result = [] # (pkg, num matches)
+ if len(fields) < 1:
+ return result
+
+
+ unionstring = "select pkgKey, SUM(cumul) AS total from ( "
+ endunionstring = ")GROUP BY pkgKey ORDER BY total DESC"
+
+ #SELECT pkgkey, SUM(cumul) AS total FROM (SELECT pkgkey, 1
+ #AS cumul FROM packages WHERE description LIKE '%foo%' UNION ... )
+ #GROUP BY pkgkey ORDER BY total DESC;
+ selects = []
+
+ # select pkgKey, 1 AS cumul from packages where description
+ # like '%devel%' or description like '%python%' or description like '%ssh%'
+# for f in fields:
+# basestring = "select pkgKey, 1 AS cumul from packages where %s like '%%%s%%' " % (f,searchstrings[0])
+# for s in searchstrings[1:]:
+# basestring = "%s or %s like '%%%s%%' " % (basestring, f, s)
+# selects.append(basestring)
+
+ for s in searchstrings:
+ basestring="select pkgKey,1 AS cumul from packages where %s like '%%%s%%' " % (fields[0], s)
+ for f in fields[1:]:
+ basestring = "%s or %s like '%%%s%%' " % (basestring, f, s)
+ selects.append(basestring)
+
+ totalstring = unionstring + " UNION ALL ".join(selects) + endunionstring
+# print totalstring
+
+ for (rep,cache) in self.primarydb.items():
+ cur = cache.cursor()
+ executeSQL(cur, totalstring)
+ for ob in cur:
+ if self._pkgKeyExcluded(rep, ob['pkgKey']):
+ continue
+ result.append((self._packageByKey(rep, ob['pkgKey']), ob['total']))
return result
@catchSqliteException
diff --git a/yummain.py b/yummain.py
index e3eb396..4c8cf0f 100755
--- a/yummain.py
+++ b/yummain.py
@@ -222,6 +222,10 @@ def print_stats(stats):
if __name__ == "__main__":
try:
+ if not sys.stdout.isatty():
+ import codecs, locale
+ sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
+
errcode = main(sys.argv[1:])
#errcode = cprof(main, sys.argv[1:])
#errcode = hotshot(main, sys.argv[1:])
_______________________________________________
Yum-devel mailing list
[email protected]
https://lists.dulug.duke.edu/mailman/listinfo/yum-devel