On Wed, Jan 21, 2015 at 7:47 AM, Steven D'Aprano <steve+comp.lang.pyt...@pearwood.info> wrote: >> More irksome that for the second we've to preface with >> >> from collections import Counter >> >> And still more a PITA that a straightforward standard name like bag (or >> multiset) is called by such an ungoogleable misleading name as counter. > > It is not misleading. collections.Counter is a class for counting (i.e. a > counter), not a bag. It is merely *similar* to a bag, but the API is not > the same as the usual bag API because the motivating design is not the same > as for bags.
To expand on this, Counter does provide a few multiset operations like union and intersection. But there are a number of cases where it falls short or does not perform as expected. To name a few: * There are no subset or superset comparison operations provided. * len(counter) returns the number of keys, not the number of elements. * Unlike a multiset, Counters can have negative or zero counts; one curious consequence of this is that an "empty" Counter can have a non-zero length and evaluate as true in boolean contexts. A while back I fiddled around with creating a true MultiSet class using a Counter as the underlying implementation. Here's what I came up with: from collections import Counter from collections.abc import MutableSet, Set __all__ = ['MultiSet'] class MultiSet(MutableSet): """Multiset class containing hashable items.""" # Class invariants: # * self._counter is a Counter s/t all values are positive ints denoting # multiplicity. # * set(self) == self._counter.keys() def __init__(self, iterable_or_mapping=()): """Create a new, empty MultiSet. If passed an iterable argument, initialize the MultiSet with items from the iterable. If passed a mapping argument, initialize the MultiSet with the keys of the mapping, each repeated a number of times equal to the associated value. """ self._counter = +Counter(iterable_or_mapping) @classmethod def _from_counter(cls, counter): self = cls.__new__(cls) self._counter = counter return self def __contains__(self, item): return item in self._counter def __hash__(self): raise TypeError('unhashable type: %s' % type(self)) def __iter__(self): return self._counter.elements() def __len__(self): # TODO: consider caching the length. return sum(self._counter.values()) def __repr__(self): if self: return 'MultiSet(%r)' % list(self) return 'MultiSet()' def add(self, item): """Add an element to the MultiSet.""" self._counter[item] += 1 def clear(self): """Remove all elements from the MultiSet, leaving it empty.""" self._counter.clear() def count(self, item): """Return the number of occurrences of the element.""" return self._counter[item] def discard(self, item): """Remove an element from the MultiSet. If there are multiple occurrences, remove only one such occurrence. If the element is not a member, do nothing. """ if item not in self._counter: return if self._counter[item] == 1: del self._counter[item] else: self._counter[item] -= 1 def __or__(self, other): """Return the union of the MultiSet and another set.""" if not isinstance(other, Set): return NotImplemented return self._from_counter(self._counter | _get_counter(other)) __ror__ = __or__ def __ior__(self, other): """Perform the in-place union of the MultiSet and another set.""" if not isinstance(other, Set): return NotImplemented self._counter |= _get_counter(other) return self def __and__(self, other): """Return the intersection of the MultiSet and another set.""" if not isinstance(other, Set): return NotImplemented return self._from_counter(self._counter & _get_counter(other)) __rand__ = __and__ def __iand__(self, other): """Perform the in-place intersection of the MultiSet and another set.""" if not isinstance(other, Set): return NotImplemented self._counter &= _get_counter(other) return self def __sub__(self, other): """Return the difference of the MultiSet and another set.""" if not isinstance(other, Set): return NotImplemented return self._from_counter(self._counter - _get_counter(other)) def __rsub__(self, other): """Return the difference of another set and the MultiSet.""" if not isinstance(other, Set): return NotImplemented return self._from_counter(_get_counter(other) - self._counter) def __isub__(self, other): """Perform the in-place set difference of the MultiSet and another set.""" if not isinstance(other, Set): return NotImplemented self._counter -= _get_counter(other) return self def __xor__(self, other): """Return the symmetric difference of the MultiSet and another set.""" if not isinstance(other, Set): return NotImplemented # X ^ Y == (X - Y) | (Y - X) other_counter = _get_counter(other) counter = (self._counter - other_counter) | (other_counter - self._counter) return self._from_counter(counter) __rxor__ = __xor__ def __ixor__(self, other): "Perform the in-place symmetric difference of the MultiSet and another set." if not isinstance(other, Set): return NotImplemented # X ^ Y == (X - Y) | (Y - X) other_counter = _get_counter(other) other_diff = other_counter - self._counter self._counter -= other_counter self._counter |= other_diff return self def __eq__(self, other): """Return whether the MultiSet and the other set have the same contents.""" if not isinstance(other, Set): return NotImplemented return self._counter == _get_counter(other) def __ne__(self, other): """Return whether the MultiSet and the other set have different contents.""" if not isinstance(other, Set): return NotImplemented return self._counter != _get_counter(other) def __le__(self, other): """Return whether the MultiSet is a subset of the other set.""" if not isinstance(other, Set): return NotImplemented other_counter = _get_counter(other) return all(self._counter[x] <= other_counter[x] for x in self._counter) def __lt__(self, other): """Return whether the MultiSet is a proper subset of the other set.""" if not isinstance(other, Set): return NotImplemented return self.__le__(other) and not self.__eq__(other) def __ge__(self, other): """Return whether the MultiSet is a superset of the other set.""" if not isinstance(other, Set): return NotImplemented other_counter = _get_counter(other) return all(self._counter[x] >= other_counter[x] for x in other_counter) def __gt__(self, other): """Return whether the MultiSet is a proper superset of the other set.""" if not isinstance(other, Set): return NotImplemented return self.__ge__(other) and not self.__eq__(other) def _get_counter(maybe_multiset): if isinstance(maybe_multiset, MultiSet): return maybe_multiset._counter else: return Counter(maybe_multiset) -- https://mail.python.org/mailman/listinfo/python-list