Source code for pybs.utils.clonable_multiset

from copy import copy
from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
import heapq as _heapq
from collections import Mapping as _Mapping
from operator import itemgetter as _itemgetter, __add__ as _add

from clonable import Clonable


[docs]class ClonableMultiset(Clonable): __slots__ = ('_ms',)
[docs] def __init__(self, iterable=0, *args, **kwargs): """Make a new ClonableMultiset. If an argument is given, it is added to the new instance using ``inplace_multiset_sum``. The result is immutable. """ Clonable.__init__(self) Clonable.__setattr__(self, '_ms', dict()) self.inplace_multiset_sum(iterable, **kwargs) self.set_immutable()
def check(self): pass
[docs] def __copy__(self): """Return a shallow mutable copy of self.""" result = self.__class__(self) result._set_mutable() return result
[docs] def __nonzero__(self): """Return true if set is not empty. Used to determine boolean value.""" return bool(self._ms)
def __setitem__(self, key, value): self._require_mutable() if isinstance(value, int): if value > 0: self._ms[key] = value elif value == 0: if key in self: del self[key] # ELSE just leave it. Don't want it in the dict anyways... else: raise ValueError( 'ClonableMultiset cannot have negative values: ' + str(key) + ': ' + str(value)) else: raise TypeError( 'bad operand type for ' + 'ClonableMultiset.inplace_multiset_sum(). ' + 'Values must be of type int, not: ' + str(type(value))) def __getitem__(self, key): try: return self._ms[key] except KeyError: return 0 def __delitem__(self, elem): 'Like dict.__delitem__() ' + \ 'but does not raise KeyError for missing values.' self._require_mutable() if elem in self._ms: del self._ms[elem]
[docs] def inplace_multiset_sum(self, iterable=0, **kwds): """Updates :math:`A` to :math:`A \uplus B`.""" self._require_mutable() if iterable is not 0: self_get = self._ms.get if isinstance(iterable, ClonableMultiset): if self: for elem, count in iterable.iteritems(): self._ms[elem] = self_get(elem, 0) + count else: self._ms.update(iterable) # Fast path when counter is empty elif isinstance(iterable, _Mapping): if self: for elem, count in iterable.iteritems(): self[elem] = self_get(elem, 0) + count else: for elem, count in iterable.iteritems(): self[elem] = count else: for elem in iterable: self._ms[elem] = self_get(elem, 0) + 1 if kwds: self.inplace_multiset_sum(kwds)
[docs] def inplace_add(self, elem): """"Add `elem` to `self` with multiplicity 1. If elem is already in A its multiplicity is increased by one. """ self._require_mutable() self._ms[elem] = self._ms.get(elem, 0) + 1
[docs] def add(self, elem): """Same as above, except the result is returned as a new instance of ClonableMultiset.""" with self.clone() as result: result._ms[elem] = result._ms.get(elem, 0) + 1 return result
[docs] def inplace_multiset_difference(self, iterable=None, **kwds): """Update :math:`A` to :math:`A \setminus B`. Raises an exception if the multiplicity of an element in B is larger than the multiplicity of the same element in A. This is non-standard behavior for mathematical multisets, but a sound check when they are used for trees. """ self._require_mutable() if iterable is not None: self_get = self._ms.get if isinstance(iterable, ClonableMultiset): # TODO: Use multiset minus or whatever here? # or else merge with next elif. for elem, count in iterable.items(): self[elem] = self_get(elem, 0) - count elif isinstance(iterable, _Mapping): for elem, count in iterable.items(): self[elem] = self_get(elem, 0) - count else: for elem in iterable: oldcount = self_get(elem, 0) if oldcount > 1: self._ms[elem] = oldcount - 1 elif oldcount == 1: del self._ms[elem] else: raise ValueError( 'ClonableMultiset cannot have negative values: ' + str(elem) + ': -1') if kwds: self.inplace_multiset_difference(kwds)
[docs] def scalar_mul(self, n): r"""Returns :math:`n \bigotimes A` as a new instance. That is a multiset where the multiplicities are scaled by `n`. """ if isinstance(n, int): with self.clone() as result: for key in self.iterkeys(): result[key] *= n return result # return type(self)(dict(((key, n*value) # for (key, value) in self.iteritems()))) # TODO: This is a nasty workaround. else: return NotImplemented
[docs] def multiset_sum(self, other): # Old name: __add__ """Return :math:`A \uplus B` as a new instance.""" if isinstance(other, ClonableMultiset): with self.clone() as result: for elem, count in other.items(): result._ms[elem] = result._ms.get(elem, 0) + count return result else: return NotImplemented
[docs] def multiset_difference(self, other): # Old name: __sub__ """Return :math:`A \setminus B` as a new instance. .. note:: Does allow a multiplicity in B to be larger than the corresponding multiplicity in A, the multiplicity of the element in question is 0. """ # TODO: Choose the not-truncated? if isinstance(other, ClonableMultiset): with self.__class__().clone() as result: for elem, count in self.items(): newcount = count - other[elem] if newcount > 0: result._ms[elem] = newcount return result else: return NotImplemented
[docs] def __or__(self, other): """Return the multiset union :math:`A \cup B` as a new instance. This is the same syntax as set union in Python (``|``). """ # TODO: Correctly assosciated to "|". Rename? if isinstance(other, ClonableMultiset): with self.__class__().clone() as result: for elem, count in self.items(): other_count = other[elem] result._ms[elem] = \ other_count if count < other_count else count for elem, count in other.items(): if elem not in self: result._ms[elem] = count return result else: return NotImplemented
[docs] def __and__(self, other): """"Return the multiset intersection :math:`A \cap B` as a new instance. This is the same syntax as set intersection in Python (``&``).""" # TODO: Correctly associated to "&". Rename? if isinstance(other, ClonableMultiset): with self.__class__().clone() as result: for elem, count in self.items(): other_count = other[elem] newcount = count if count < other_count else other_count if newcount > 0: result._ms[elem] = newcount return result else: return NotImplemented
[docs] def cardinality(self): """Return sum of multiplicities.""" return reduce(_add, self._ms.values(), 0)
[docs] def no_uniques(self): """Number of different elements in the multiset.""" return len(self._ms)
[docs] def most_common(self, n=None): """Return the `n` most common elements. """ if n is None: return sorted(self.iteritems(), key=_itemgetter(1), reverse=True) return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
[docs] def elements(self): 'Iterator returning each element as many times as its multiplicity.' return _chain.from_iterable(_starmap(_repeat, self._ms.iteritems()))
def __eq__(self, other): if self is other: return True elif isinstance(other, ClonableMultiset): return self._ms == other._ms else: return NotImplemented def __ne__(self, other): if self is other: return False elif isinstance(other, ClonableMultiset): return self._ms != other._ms else: return NotImplemented def __setattr__(self, *args, **kwargs): raise AttributeError def __delattr__(self, *args, **kwargs): raise AttributeError def __iter__(self): return iter(self._ms) def iteritems(self): """Return iterator over (key,value)-pairs.""" return self._ms.iteritems() def iterkeys(self): """Return iterator over keys.""" return self._ms.iterkeys()
[docs] def keys(self): """Return list of keys (elements).""" return self._ms.keys()
[docs] def values(self): """Return list of values (multiplicities)""" return self._ms.values()
[docs] def items(self): """Return list of (key,value)-pairs.""" return self._ms.items()
[docs] def sub(self, elem): with self.clone() as result: if elem in result: count = result[elem] if count > 1: result._ms[elem] -= 1#fast_setitem(elem, count - 1) elif count == 1: del result[elem] else: raise ValueError return result
[docs] def __contains__(self, elem): """Return True if elem is in self.""" return elem in self._ms
def __bool__(self): return bool(self._ms)
[docs] def _hash_(self): """Return hash value for multiset. Only called if immutable. """ # It would have been simpler and maybe more obvious to # use hash(tuple(sorted(self._d.iteritems()))) from this discussion # so far, but this solution is O(n). I don't know what kind of # n we are going to run into, but sometimes it's hard to resist the # urge to optimize when it will gain improved algorithmic performance. result = 0 for pair in self._ms.iteritems(): result ^= hash(pair) return result
[docs] def __repr__(self): """Return a string representative of the ClonableMultiset-object.""" if not self: return '%s()' % self.__class__.__name__ items = ', '.join(map('%r: %r'.__mod__, self.most_common())) return '%s({%s})' % (self.__class__.__name__, items)