This IndexStreamIterator class can be used together with the
pkg_desc_index_line_read function to read and index file incrementally
as a stream.

The MultiIterGroupBy class can be used to iterate over multiple
IndexStreamIterator instances at once, incrementally grouping results
for a particular package from multiple indices, while limiting the
amount of any given index that must be in memory at once.

Both of these classes are used by the  IndexedPortdb class in the next
patch of this series.

X-Gentoo-Bug: 525718
X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718
---
 pym/portage/cache/index/IndexStreamIterator.py | 27 +++++++++
 pym/portage/util/iterators/MultiIterGroupBy.py | 82 ++++++++++++++++++++++++++
 pym/portage/util/iterators/__init__.py         |  2 +
 3 files changed, 111 insertions(+)
 create mode 100644 pym/portage/cache/index/IndexStreamIterator.py
 create mode 100644 pym/portage/util/iterators/MultiIterGroupBy.py
 create mode 100644 pym/portage/util/iterators/__init__.py

diff --git a/pym/portage/cache/index/IndexStreamIterator.py 
b/pym/portage/cache/index/IndexStreamIterator.py
new file mode 100644
index 0000000..972aee1
--- /dev/null
+++ b/pym/portage/cache/index/IndexStreamIterator.py
@@ -0,0 +1,27 @@
+# Copyright 2014 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+class IndexStreamIterator(object):
+
+       def __init__(self, f, parser):
+
+               self.parser = parser
+               self._file = f
+
+       def close(self):
+
+               if self._file is not None:
+                       self._file.close()
+                       self._file = None
+
+       def __iter__(self):
+
+               try:
+
+                       for line in self._file:
+                               node = self.parser(line)
+                               if node is not None:
+                                       yield node
+
+               finally:
+                       self.close()
diff --git a/pym/portage/util/iterators/MultiIterGroupBy.py 
b/pym/portage/util/iterators/MultiIterGroupBy.py
new file mode 100644
index 0000000..d4e62ad
--- /dev/null
+++ b/pym/portage/util/iterators/MultiIterGroupBy.py
@@ -0,0 +1,82 @@
+# Copyright 2014 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+class MultiIterGroupBy(object):
+       """
+       This class functions similarly to the itertools.groupby function,
+       except that it takes multiple source iterators as input. The source
+       iterators must yield objects in sorted order. A group is yielded as
+       soon as the progress of all iterators reaches a state which
+       guarantees that there can not be any remaining (unseen) elements of
+       the group. This is useful for incremental display of grouped search
+       results.
+       """
+
+       def __init__(self, iterators, key = None):
+               self._iterators = iterators
+               self._key = key
+
+       def __iter__(self):
+
+               progress = []
+               iterators = self._iterators[:]
+               for index in iterators:
+                       progress.append(None)
+
+               key_map = {}
+               eof = []
+               key_getter = self._key
+               if key_getter is None:
+                       key_getter = lambda x: x
+               max_progress = None
+
+               while iterators:
+                       min_progress = None
+                       for i, index in enumerate(iterators):
+
+                               if max_progress is not None and \
+                                       max_progress == progress[i] and \
+                                       min_progress is not None and \
+                                       max_progress != min_progress:
+                                       # This one has the most progress,
+                                       # so allow the others to catch up.
+                                       continue
+
+                               for entry in index:
+                                       progress[i] = key_getter(entry)
+                                       key_group = 
key_map.get(key_getter(entry))
+                                       if key_group is None:
+                                               key_group = []
+                                               key_map[key_getter(entry)] = 
key_group
+
+                                       key_group.append(entry)
+
+                                       if min_progress is None or \
+                                               key_getter(entry) < 
min_progress:
+                                               min_progress = key_getter(entry)
+
+                                       if max_progress is None or \
+                                               key_getter(entry) >= 
max_progress:
+                                               max_progress = key_getter(entry)
+                                               # This one has the most 
progress,
+                                               # so allow the others to catch 
up.
+                                               break
+
+                               else:
+                                       eof.append(i)
+
+                       if eof:
+                               for i in reversed(eof):
+                                       del iterators[i]
+                                       del progress[i]
+                               del eof[:]
+
+                       yield_these = []
+                       for k in key_map:
+                               if k <= min_progress:
+                                       yield_these.append(k)
+
+                       if yield_these:
+                               yield_these.sort()
+                               for k in yield_these:
+                                       yield key_map.pop(k)
diff --git a/pym/portage/util/iterators/__init__.py 
b/pym/portage/util/iterators/__init__.py
new file mode 100644
index 0000000..7cd880e
--- /dev/null
+++ b/pym/portage/util/iterators/__init__.py
@@ -0,0 +1,2 @@
+# Copyright 2014 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
-- 
2.0.4


Reply via email to