diff options
Diffstat (limited to 'third_party/python/pyrsistent/pyrsistent/_pvector.py')
-rw-r--r-- | third_party/python/pyrsistent/pyrsistent/_pvector.py | 713 |
1 files changed, 713 insertions, 0 deletions
diff --git a/third_party/python/pyrsistent/pyrsistent/_pvector.py b/third_party/python/pyrsistent/pyrsistent/_pvector.py new file mode 100644 index 0000000000..82232782b7 --- /dev/null +++ b/third_party/python/pyrsistent/pyrsistent/_pvector.py @@ -0,0 +1,713 @@ +from abc import abstractmethod, ABCMeta +from ._compat import Sequence, Hashable +from numbers import Integral +import operator +import six +from pyrsistent._transformations import transform + + +def _bitcount(val): + return bin(val).count("1") + +BRANCH_FACTOR = 32 +BIT_MASK = BRANCH_FACTOR - 1 +SHIFT = _bitcount(BIT_MASK) + + +def compare_pvector(v, other, operator): + return operator(v.tolist(), other.tolist() if isinstance(other, PVector) else other) + + +def _index_or_slice(index, stop): + if stop is None: + return index + + return slice(index, stop) + + +class PythonPVector(object): + """ + Support structure for PVector that implements structural sharing for vectors using a trie. + """ + __slots__ = ('_count', '_shift', '_root', '_tail', '_tail_offset', '__weakref__') + + def __new__(cls, count, shift, root, tail): + self = super(PythonPVector, cls).__new__(cls) + self._count = count + self._shift = shift + self._root = root + self._tail = tail + + # Derived attribute stored for performance + self._tail_offset = self._count - len(self._tail) + return self + + def __len__(self): + return self._count + + def __getitem__(self, index): + if isinstance(index, slice): + # There are more conditions than the below where it would be OK to + # return ourselves, implement those... + if index.start is None and index.stop is None and index.step is None: + return self + + # This is a bit nasty realizing the whole structure as a list before + # slicing it but it is the fastest way I've found to date, and it's easy :-) + return _EMPTY_PVECTOR.extend(self.tolist()[index]) + + if index < 0: + index += self._count + + return PythonPVector._node_for(self, index)[index & BIT_MASK] + + def __add__(self, other): + return self.extend(other) + + def __repr__(self): + return 'pvector({0})'.format(str(self.tolist())) + + def __str__(self): + return self.__repr__() + + def __iter__(self): + # This is kind of lazy and will produce some memory overhead but it is the fasted method + # by far of those tried since it uses the speed of the built in python list directly. + return iter(self.tolist()) + + def __ne__(self, other): + return not self.__eq__(other) + + def __eq__(self, other): + return self is other or (hasattr(other, '__len__') and self._count == len(other)) and compare_pvector(self, other, operator.eq) + + def __gt__(self, other): + return compare_pvector(self, other, operator.gt) + + def __lt__(self, other): + return compare_pvector(self, other, operator.lt) + + def __ge__(self, other): + return compare_pvector(self, other, operator.ge) + + def __le__(self, other): + return compare_pvector(self, other, operator.le) + + def __mul__(self, times): + if times <= 0 or self is _EMPTY_PVECTOR: + return _EMPTY_PVECTOR + + if times == 1: + return self + + return _EMPTY_PVECTOR.extend(times * self.tolist()) + + __rmul__ = __mul__ + + def _fill_list(self, node, shift, the_list): + if shift: + shift -= SHIFT + for n in node: + self._fill_list(n, shift, the_list) + else: + the_list.extend(node) + + def tolist(self): + """ + The fastest way to convert the vector into a python list. + """ + the_list = [] + self._fill_list(self._root, self._shift, the_list) + the_list.extend(self._tail) + return the_list + + def _totuple(self): + """ + Returns the content as a python tuple. + """ + return tuple(self.tolist()) + + def __hash__(self): + # Taking the easy way out again... + return hash(self._totuple()) + + def transform(self, *transformations): + return transform(self, transformations) + + def __reduce__(self): + # Pickling support + return pvector, (self.tolist(),) + + def mset(self, *args): + if len(args) % 2: + raise TypeError("mset expected an even number of arguments") + + evolver = self.evolver() + for i in range(0, len(args), 2): + evolver[args[i]] = args[i+1] + + return evolver.persistent() + + class Evolver(object): + __slots__ = ('_count', '_shift', '_root', '_tail', '_tail_offset', '_dirty_nodes', + '_extra_tail', '_cached_leafs', '_orig_pvector') + + def __init__(self, v): + self._reset(v) + + def __getitem__(self, index): + if not isinstance(index, Integral): + raise TypeError("'%s' object cannot be interpreted as an index" % type(index).__name__) + + if index < 0: + index += self._count + len(self._extra_tail) + + if self._count <= index < self._count + len(self._extra_tail): + return self._extra_tail[index - self._count] + + return PythonPVector._node_for(self, index)[index & BIT_MASK] + + def _reset(self, v): + self._count = v._count + self._shift = v._shift + self._root = v._root + self._tail = v._tail + self._tail_offset = v._tail_offset + self._dirty_nodes = {} + self._cached_leafs = {} + self._extra_tail = [] + self._orig_pvector = v + + def append(self, element): + self._extra_tail.append(element) + return self + + def extend(self, iterable): + self._extra_tail.extend(iterable) + return self + + def set(self, index, val): + self[index] = val + return self + + def __setitem__(self, index, val): + if not isinstance(index, Integral): + raise TypeError("'%s' object cannot be interpreted as an index" % type(index).__name__) + + if index < 0: + index += self._count + len(self._extra_tail) + + if 0 <= index < self._count: + node = self._cached_leafs.get(index >> SHIFT) + if node: + node[index & BIT_MASK] = val + elif index >= self._tail_offset: + if id(self._tail) not in self._dirty_nodes: + self._tail = list(self._tail) + self._dirty_nodes[id(self._tail)] = True + self._cached_leafs[index >> SHIFT] = self._tail + self._tail[index & BIT_MASK] = val + else: + self._root = self._do_set(self._shift, self._root, index, val) + elif self._count <= index < self._count + len(self._extra_tail): + self._extra_tail[index - self._count] = val + elif index == self._count + len(self._extra_tail): + self._extra_tail.append(val) + else: + raise IndexError("Index out of range: %s" % (index,)) + + def _do_set(self, level, node, i, val): + if id(node) in self._dirty_nodes: + ret = node + else: + ret = list(node) + self._dirty_nodes[id(ret)] = True + + if level == 0: + ret[i & BIT_MASK] = val + self._cached_leafs[i >> SHIFT] = ret + else: + sub_index = (i >> level) & BIT_MASK # >>> + ret[sub_index] = self._do_set(level - SHIFT, node[sub_index], i, val) + + return ret + + def delete(self, index): + del self[index] + return self + + def __delitem__(self, key): + if self._orig_pvector: + # All structural sharing bets are off, base evolver on _extra_tail only + l = PythonPVector(self._count, self._shift, self._root, self._tail).tolist() + l.extend(self._extra_tail) + self._reset(_EMPTY_PVECTOR) + self._extra_tail = l + + del self._extra_tail[key] + + def persistent(self): + result = self._orig_pvector + if self.is_dirty(): + result = PythonPVector(self._count, self._shift, self._root, self._tail).extend(self._extra_tail) + self._reset(result) + + return result + + def __len__(self): + return self._count + len(self._extra_tail) + + def is_dirty(self): + return bool(self._dirty_nodes or self._extra_tail) + + def evolver(self): + return PythonPVector.Evolver(self) + + def set(self, i, val): + # This method could be implemented by a call to mset() but doing so would cause + # a ~5 X performance penalty on PyPy (considered the primary platform for this implementation + # of PVector) so we're keeping this implementation for now. + + if not isinstance(i, Integral): + raise TypeError("'%s' object cannot be interpreted as an index" % type(i).__name__) + + if i < 0: + i += self._count + + if 0 <= i < self._count: + if i >= self._tail_offset: + new_tail = list(self._tail) + new_tail[i & BIT_MASK] = val + return PythonPVector(self._count, self._shift, self._root, new_tail) + + return PythonPVector(self._count, self._shift, self._do_set(self._shift, self._root, i, val), self._tail) + + if i == self._count: + return self.append(val) + + raise IndexError("Index out of range: %s" % (i,)) + + def _do_set(self, level, node, i, val): + ret = list(node) + if level == 0: + ret[i & BIT_MASK] = val + else: + sub_index = (i >> level) & BIT_MASK # >>> + ret[sub_index] = self._do_set(level - SHIFT, node[sub_index], i, val) + + return ret + + @staticmethod + def _node_for(pvector_like, i): + if 0 <= i < pvector_like._count: + if i >= pvector_like._tail_offset: + return pvector_like._tail + + node = pvector_like._root + for level in range(pvector_like._shift, 0, -SHIFT): + node = node[(i >> level) & BIT_MASK] # >>> + + return node + + raise IndexError("Index out of range: %s" % (i,)) + + def _create_new_root(self): + new_shift = self._shift + + # Overflow root? + if (self._count >> SHIFT) > (1 << self._shift): # >>> + new_root = [self._root, self._new_path(self._shift, self._tail)] + new_shift += SHIFT + else: + new_root = self._push_tail(self._shift, self._root, self._tail) + + return new_root, new_shift + + def append(self, val): + if len(self._tail) < BRANCH_FACTOR: + new_tail = list(self._tail) + new_tail.append(val) + return PythonPVector(self._count + 1, self._shift, self._root, new_tail) + + # Full tail, push into tree + new_root, new_shift = self._create_new_root() + return PythonPVector(self._count + 1, new_shift, new_root, [val]) + + def _new_path(self, level, node): + if level == 0: + return node + + return [self._new_path(level - SHIFT, node)] + + def _mutating_insert_tail(self): + self._root, self._shift = self._create_new_root() + self._tail = [] + + def _mutating_fill_tail(self, offset, sequence): + max_delta_len = BRANCH_FACTOR - len(self._tail) + delta = sequence[offset:offset + max_delta_len] + self._tail.extend(delta) + delta_len = len(delta) + self._count += delta_len + return offset + delta_len + + def _mutating_extend(self, sequence): + offset = 0 + sequence_len = len(sequence) + while offset < sequence_len: + offset = self._mutating_fill_tail(offset, sequence) + if len(self._tail) == BRANCH_FACTOR: + self._mutating_insert_tail() + + self._tail_offset = self._count - len(self._tail) + + def extend(self, obj): + # Mutates the new vector directly for efficiency but that's only an + # implementation detail, once it is returned it should be considered immutable + l = obj.tolist() if isinstance(obj, PythonPVector) else list(obj) + if l: + new_vector = self.append(l[0]) + new_vector._mutating_extend(l[1:]) + return new_vector + + return self + + def _push_tail(self, level, parent, tail_node): + """ + if parent is leaf, insert node, + else does it map to an existing child? -> + node_to_insert = push node one more level + else alloc new path + + return node_to_insert placed in copy of parent + """ + ret = list(parent) + + if level == SHIFT: + ret.append(tail_node) + return ret + + sub_index = ((self._count - 1) >> level) & BIT_MASK # >>> + if len(parent) > sub_index: + ret[sub_index] = self._push_tail(level - SHIFT, parent[sub_index], tail_node) + return ret + + ret.append(self._new_path(level - SHIFT, tail_node)) + return ret + + def index(self, value, *args, **kwargs): + return self.tolist().index(value, *args, **kwargs) + + def count(self, value): + return self.tolist().count(value) + + def delete(self, index, stop=None): + l = self.tolist() + del l[_index_or_slice(index, stop)] + return _EMPTY_PVECTOR.extend(l) + + def remove(self, value): + l = self.tolist() + l.remove(value) + return _EMPTY_PVECTOR.extend(l) + +@six.add_metaclass(ABCMeta) +class PVector(object): + """ + Persistent vector implementation. Meant as a replacement for the cases where you would normally + use a Python list. + + Do not instantiate directly, instead use the factory functions :py:func:`v` and :py:func:`pvector` to + create an instance. + + Heavily influenced by the persistent vector available in Clojure. Initially this was more or + less just a port of the Java code for the Clojure vector. It has since been modified and to + some extent optimized for usage in Python. + + The vector is organized as a trie, any mutating method will return a new vector that contains the changes. No + updates are done to the original vector. Structural sharing between vectors are applied where possible to save + space and to avoid making complete copies. + + This structure corresponds most closely to the built in list type and is intended as a replacement. Where the + semantics are the same (more or less) the same function names have been used but for some cases it is not possible, + for example assignments. + + The PVector implements the Sequence protocol and is Hashable. + + Inserts are amortized O(1). Random access is log32(n) where n is the size of the vector. + + The following are examples of some common operations on persistent vectors: + + >>> p = v(1, 2, 3) + >>> p2 = p.append(4) + >>> p3 = p2.extend([5, 6, 7]) + >>> p + pvector([1, 2, 3]) + >>> p2 + pvector([1, 2, 3, 4]) + >>> p3 + pvector([1, 2, 3, 4, 5, 6, 7]) + >>> p3[5] + 6 + >>> p.set(1, 99) + pvector([1, 99, 3]) + >>> + """ + + @abstractmethod + def __len__(self): + """ + >>> len(v(1, 2, 3)) + 3 + """ + + @abstractmethod + def __getitem__(self, index): + """ + Get value at index. Full slicing support. + + >>> v1 = v(5, 6, 7, 8) + >>> v1[2] + 7 + >>> v1[1:3] + pvector([6, 7]) + """ + + @abstractmethod + def __add__(self, other): + """ + >>> v1 = v(1, 2) + >>> v2 = v(3, 4) + >>> v1 + v2 + pvector([1, 2, 3, 4]) + """ + + @abstractmethod + def __mul__(self, times): + """ + >>> v1 = v(1, 2) + >>> 3 * v1 + pvector([1, 2, 1, 2, 1, 2]) + """ + + @abstractmethod + def __hash__(self): + """ + >>> v1 = v(1, 2, 3) + >>> v2 = v(1, 2, 3) + >>> hash(v1) == hash(v2) + True + """ + + @abstractmethod + def evolver(self): + """ + Create a new evolver for this pvector. The evolver acts as a mutable view of the vector + with "transaction like" semantics. No part of the underlying vector i updated, it is still + fully immutable. Furthermore multiple evolvers created from the same pvector do not + interfere with each other. + + You may want to use an evolver instead of working directly with the pvector in the + following cases: + + * Multiple updates are done to the same vector and the intermediate results are of no + interest. In this case using an evolver may be a more efficient and easier to work with. + * You need to pass a vector into a legacy function or a function that you have no control + over which performs in place mutations of lists. In this case pass an evolver instance + instead and then create a new pvector from the evolver once the function returns. + + The following example illustrates a typical workflow when working with evolvers. It also + displays most of the API (which i kept small by design, you should not be tempted to + use evolvers in excess ;-)). + + Create the evolver and perform various mutating updates to it: + + >>> v1 = v(1, 2, 3, 4, 5) + >>> e = v1.evolver() + >>> e[1] = 22 + >>> _ = e.append(6) + >>> _ = e.extend([7, 8, 9]) + >>> e[8] += 1 + >>> len(e) + 9 + + The underlying pvector remains the same: + + >>> v1 + pvector([1, 2, 3, 4, 5]) + + The changes are kept in the evolver. An updated pvector can be created using the + persistent() function on the evolver. + + >>> v2 = e.persistent() + >>> v2 + pvector([1, 22, 3, 4, 5, 6, 7, 8, 10]) + + The new pvector will share data with the original pvector in the same way that would have + been done if only using operations on the pvector. + """ + + @abstractmethod + def mset(self, *args): + """ + Return a new vector with elements in specified positions replaced by values (multi set). + + Elements on even positions in the argument list are interpreted as indexes while + elements on odd positions are considered values. + + >>> v1 = v(1, 2, 3) + >>> v1.mset(0, 11, 2, 33) + pvector([11, 2, 33]) + """ + + @abstractmethod + def set(self, i, val): + """ + Return a new vector with element at position i replaced with val. The original vector remains unchanged. + + Setting a value one step beyond the end of the vector is equal to appending. Setting beyond that will + result in an IndexError. + + >>> v1 = v(1, 2, 3) + >>> v1.set(1, 4) + pvector([1, 4, 3]) + >>> v1.set(3, 4) + pvector([1, 2, 3, 4]) + >>> v1.set(-1, 4) + pvector([1, 2, 4]) + """ + + @abstractmethod + def append(self, val): + """ + Return a new vector with val appended. + + >>> v1 = v(1, 2) + >>> v1.append(3) + pvector([1, 2, 3]) + """ + + @abstractmethod + def extend(self, obj): + """ + Return a new vector with all values in obj appended to it. Obj may be another + PVector or any other Iterable. + + >>> v1 = v(1, 2, 3) + >>> v1.extend([4, 5]) + pvector([1, 2, 3, 4, 5]) + """ + + @abstractmethod + def index(self, value, *args, **kwargs): + """ + Return first index of value. Additional indexes may be supplied to limit the search to a + sub range of the vector. + + >>> v1 = v(1, 2, 3, 4, 3) + >>> v1.index(3) + 2 + >>> v1.index(3, 3, 5) + 4 + """ + + @abstractmethod + def count(self, value): + """ + Return the number of times that value appears in the vector. + + >>> v1 = v(1, 4, 3, 4) + >>> v1.count(4) + 2 + """ + + @abstractmethod + def transform(self, *transformations): + """ + Transform arbitrarily complex combinations of PVectors and PMaps. A transformation + consists of two parts. One match expression that specifies which elements to transform + and one transformation function that performs the actual transformation. + + >>> from pyrsistent import freeze, ny + >>> news_paper = freeze({'articles': [{'author': 'Sara', 'content': 'A short article'}, + ... {'author': 'Steve', 'content': 'A slightly longer article'}], + ... 'weather': {'temperature': '11C', 'wind': '5m/s'}}) + >>> short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:25] + '...' if len(c) > 25 else c) + >>> very_short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:15] + '...' if len(c) > 15 else c) + >>> very_short_news.articles[0].content + 'A short article' + >>> very_short_news.articles[1].content + 'A slightly long...' + + When nothing has been transformed the original data structure is kept + + >>> short_news is news_paper + True + >>> very_short_news is news_paper + False + >>> very_short_news.articles[0] is news_paper.articles[0] + True + """ + + @abstractmethod + def delete(self, index, stop=None): + """ + Delete a portion of the vector by index or range. + + >>> v1 = v(1, 2, 3, 4, 5) + >>> v1.delete(1) + pvector([1, 3, 4, 5]) + >>> v1.delete(1, 3) + pvector([1, 4, 5]) + """ + + @abstractmethod + def remove(self, value): + """ + Remove the first occurrence of a value from the vector. + + >>> v1 = v(1, 2, 3, 2, 1) + >>> v2 = v1.remove(1) + >>> v2 + pvector([2, 3, 2, 1]) + >>> v2.remove(1) + pvector([2, 3, 2]) + """ + + +_EMPTY_PVECTOR = PythonPVector(0, SHIFT, [], []) +PVector.register(PythonPVector) +Sequence.register(PVector) +Hashable.register(PVector) + +def python_pvector(iterable=()): + """ + Create a new persistent vector containing the elements in iterable. + + >>> v1 = pvector([1, 2, 3]) + >>> v1 + pvector([1, 2, 3]) + """ + return _EMPTY_PVECTOR.extend(iterable) + +try: + # Use the C extension as underlying trie implementation if it is available + import os + if os.environ.get('PYRSISTENT_NO_C_EXTENSION'): + pvector = python_pvector + else: + from pvectorc import pvector + PVector.register(type(pvector())) +except ImportError: + pvector = python_pvector + + +def v(*elements): + """ + Create a new persistent vector containing all parameters to this function. + + >>> v1 = v(1, 2, 3) + >>> v1 + pvector([1, 2, 3]) + """ + return pvector(elements) |