Switch to a different implementation of a sorted list.
- Raymond Hettingers version is simpler and faster when loading large summaries. e.g. 38 secs instead of 48 secs previously.
This commit is contained in:
parent
787c304d06
commit
e84d7ac958
3 changed files with 342 additions and 6 deletions
|
|
@ -34,10 +34,10 @@ import time
|
||||||
import docopt
|
import docopt
|
||||||
import pygments.styles
|
import pygments.styles
|
||||||
import pyinotify
|
import pyinotify
|
||||||
import sortedcontainers
|
|
||||||
|
|
||||||
import eris
|
import eris
|
||||||
from eris import fill3
|
from eris import fill3
|
||||||
|
from eris import sorted_collection
|
||||||
from eris import terminal
|
from eris import terminal
|
||||||
from eris import termstr
|
from eris import termstr
|
||||||
from eris import tools
|
from eris import tools
|
||||||
|
|
@ -223,7 +223,7 @@ class Summary:
|
||||||
self.is_loaded = False
|
self.is_loaded = False
|
||||||
self.closest_placeholder_generator = None
|
self.closest_placeholder_generator = None
|
||||||
sort_func = directory_sort if self.is_directory_sort else type_sort
|
sort_func = directory_sort if self.is_directory_sort else type_sort
|
||||||
self._entries = sortedcontainers.SortedList([], key=sort_func)
|
self._entries = sorted_collection.SortedCollection([], key=sort_func)
|
||||||
|
|
||||||
def __getstate__(self):
|
def __getstate__(self):
|
||||||
state = self.__dict__.copy()
|
state = self.__dict__.copy()
|
||||||
|
|
@ -261,7 +261,7 @@ class Summary:
|
||||||
|
|
||||||
def sort_entries(self):
|
def sort_entries(self):
|
||||||
key_func = directory_sort if self.is_directory_sort else type_sort
|
key_func = directory_sort if self.is_directory_sort else type_sort
|
||||||
self._entries = sortedcontainers.SortedList(
|
self._entries = sorted_collection.SortedCollection(
|
||||||
self._entries, key=key_func)
|
self._entries, key=key_func)
|
||||||
self.closest_placeholder_generator = None
|
self.closest_placeholder_generator = None
|
||||||
|
|
||||||
|
|
@ -275,7 +275,7 @@ class Summary:
|
||||||
Entry.MAX_WIDTH = max(len(entry), Entry.MAX_WIDTH)
|
Entry.MAX_WIDTH = max(len(entry), Entry.MAX_WIDTH)
|
||||||
self._max_path_length = max(len(entry.path) - len("./"),
|
self._max_path_length = max(len(entry.path) - len("./"),
|
||||||
self._max_path_length)
|
self._max_path_length)
|
||||||
self._entries.add(entry)
|
self._entries.insert(entry)
|
||||||
entry_index = self._entries.index(entry)
|
entry_index = self._entries.index(entry)
|
||||||
x, y = self._cursor_position
|
x, y = self._cursor_position
|
||||||
if entry_index <= y:
|
if entry_index <= y:
|
||||||
|
|
@ -310,7 +310,8 @@ class Summary:
|
||||||
self.result_total -= 1
|
self.result_total -= 1
|
||||||
result.delete()
|
result.delete()
|
||||||
row = self._entries[index]
|
row = self._entries[index]
|
||||||
self._entries.pop(index)
|
del self._entries._keys[index]
|
||||||
|
del self._entries._items[index]
|
||||||
if len(row) == Entry.MAX_WIDTH:
|
if len(row) == Entry.MAX_WIDTH:
|
||||||
Entry.MAX_WIDTH = max((len(entry) for entry in self._entries),
|
Entry.MAX_WIDTH = max((len(entry) for entry in self._entries),
|
||||||
default=0)
|
default=0)
|
||||||
|
|
|
||||||
335
eris/sorted_collection.py
Normal file
335
eris/sorted_collection.py
Normal file
|
|
@ -0,0 +1,335 @@
|
||||||
|
|
||||||
|
# From https://code.activestate.com/recipes/577197-sortedcollection/
|
||||||
|
|
||||||
|
# Copyright (c) 2010 Raymond Hettinger
|
||||||
|
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
|
# in the Software without restriction, including without limitation the rights
|
||||||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
# copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
# The above copyright notice and this permission notice shall be included in all
|
||||||
|
# copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
# SOFTWARE.
|
||||||
|
|
||||||
|
|
||||||
|
from bisect import bisect_left, bisect_right
|
||||||
|
|
||||||
|
class SortedCollection(object):
|
||||||
|
'''Sequence sorted by a key function.
|
||||||
|
|
||||||
|
SortedCollection() is much easier to work with than using bisect() directly.
|
||||||
|
It supports key functions like those use in sorted(), min(), and max().
|
||||||
|
The result of the key function call is saved so that keys can be searched
|
||||||
|
efficiently.
|
||||||
|
|
||||||
|
Instead of returning an insertion-point which can be hard to interpret, the
|
||||||
|
five find-methods return a specific item in the sequence. They can scan for
|
||||||
|
exact matches, the last item less-than-or-equal to a key, or the first item
|
||||||
|
greater-than-or-equal to a key.
|
||||||
|
|
||||||
|
Once found, an item's ordinal position can be located with the index() method.
|
||||||
|
New items can be added with the insert() and insert_right() methods.
|
||||||
|
Old items can be deleted with the remove() method.
|
||||||
|
|
||||||
|
The usual sequence methods are provided to support indexing, slicing,
|
||||||
|
length lookup, clearing, copying, forward and reverse iteration, contains
|
||||||
|
checking, item counts, item removal, and a nice looking repr.
|
||||||
|
|
||||||
|
Finding and indexing are O(log n) operations while iteration and insertion
|
||||||
|
are O(n). The initial sort is O(n log n).
|
||||||
|
|
||||||
|
The key function is stored in the 'key' attibute for easy introspection or
|
||||||
|
so that you can assign a new key function (triggering an automatic re-sort).
|
||||||
|
|
||||||
|
In short, the class was designed to handle all of the common use cases for
|
||||||
|
bisect but with a simpler API and support for key functions.
|
||||||
|
|
||||||
|
>>> from pprint import pprint
|
||||||
|
>>> from operator import itemgetter
|
||||||
|
|
||||||
|
>>> s = SortedCollection(key=itemgetter(2))
|
||||||
|
>>> for record in [
|
||||||
|
... ('roger', 'young', 30),
|
||||||
|
... ('angela', 'jones', 28),
|
||||||
|
... ('bill', 'smith', 22),
|
||||||
|
... ('david', 'thomas', 32)]:
|
||||||
|
... s.insert(record)
|
||||||
|
|
||||||
|
>>> pprint(list(s)) # show records sorted by age
|
||||||
|
[('bill', 'smith', 22),
|
||||||
|
('angela', 'jones', 28),
|
||||||
|
('roger', 'young', 30),
|
||||||
|
('david', 'thomas', 32)]
|
||||||
|
|
||||||
|
>>> s.find_le(29) # find oldest person aged 29 or younger
|
||||||
|
('angela', 'jones', 28)
|
||||||
|
>>> s.find_lt(28) # find oldest person under 28
|
||||||
|
('bill', 'smith', 22)
|
||||||
|
>>> s.find_gt(28) # find youngest person over 28
|
||||||
|
('roger', 'young', 30)
|
||||||
|
|
||||||
|
>>> r = s.find_ge(32) # find youngest person aged 32 or older
|
||||||
|
>>> s.index(r) # get the index of their record
|
||||||
|
3
|
||||||
|
>>> s[3] # fetch the record at that index
|
||||||
|
('david', 'thomas', 32)
|
||||||
|
|
||||||
|
>>> s.key = itemgetter(0) # now sort by first name
|
||||||
|
>>> pprint(list(s))
|
||||||
|
[('angela', 'jones', 28),
|
||||||
|
('bill', 'smith', 22),
|
||||||
|
('david', 'thomas', 32),
|
||||||
|
('roger', 'young', 30)]
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, iterable=(), key=None):
|
||||||
|
self._given_key = key
|
||||||
|
key = (lambda x: x) if key is None else key
|
||||||
|
decorated = sorted((key(item), item) for item in iterable)
|
||||||
|
self._keys = [k for k, item in decorated]
|
||||||
|
self._items = [item for k, item in decorated]
|
||||||
|
self._key = key
|
||||||
|
|
||||||
|
def _getkey(self):
|
||||||
|
return self._key
|
||||||
|
|
||||||
|
def _setkey(self, key):
|
||||||
|
if key is not self._key:
|
||||||
|
self.__init__(self._items, key=key)
|
||||||
|
|
||||||
|
def _delkey(self):
|
||||||
|
self._setkey(None)
|
||||||
|
|
||||||
|
key = property(_getkey, _setkey, _delkey, 'key function')
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
self.__init__([], self._key)
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
return self.__class__(self, self._key)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self._items)
|
||||||
|
|
||||||
|
def __getitem__(self, i):
|
||||||
|
return self._items[i]
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self._items)
|
||||||
|
|
||||||
|
def __reversed__(self):
|
||||||
|
return reversed(self._items)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '%s(%r, key=%s)' % (
|
||||||
|
self.__class__.__name__,
|
||||||
|
self._items,
|
||||||
|
getattr(self._given_key, '__name__', repr(self._given_key))
|
||||||
|
)
|
||||||
|
|
||||||
|
def __reduce__(self):
|
||||||
|
return self.__class__, (self._items, self._given_key)
|
||||||
|
|
||||||
|
def __contains__(self, item):
|
||||||
|
k = self._key(item)
|
||||||
|
i = bisect_left(self._keys, k)
|
||||||
|
j = bisect_right(self._keys, k)
|
||||||
|
return item in self._items[i:j]
|
||||||
|
|
||||||
|
def index(self, item):
|
||||||
|
'Find the position of an item. Raise ValueError if not found.'
|
||||||
|
k = self._key(item)
|
||||||
|
i = bisect_left(self._keys, k)
|
||||||
|
j = bisect_right(self._keys, k)
|
||||||
|
return self._items[i:j].index(item) + i
|
||||||
|
|
||||||
|
def count(self, item):
|
||||||
|
'Return number of occurrences of item'
|
||||||
|
k = self._key(item)
|
||||||
|
i = bisect_left(self._keys, k)
|
||||||
|
j = bisect_right(self._keys, k)
|
||||||
|
return self._items[i:j].count(item)
|
||||||
|
|
||||||
|
def insert(self, item):
|
||||||
|
'Insert a new item. If equal keys are found, add to the left'
|
||||||
|
k = self._key(item)
|
||||||
|
i = bisect_left(self._keys, k)
|
||||||
|
self._keys.insert(i, k)
|
||||||
|
self._items.insert(i, item)
|
||||||
|
|
||||||
|
def insert_right(self, item):
|
||||||
|
'Insert a new item. If equal keys are found, add to the right'
|
||||||
|
k = self._key(item)
|
||||||
|
i = bisect_right(self._keys, k)
|
||||||
|
self._keys.insert(i, k)
|
||||||
|
self._items.insert(i, item)
|
||||||
|
|
||||||
|
def remove(self, item):
|
||||||
|
'Remove first occurence of item. Raise ValueError if not found'
|
||||||
|
i = self.index(item)
|
||||||
|
del self._keys[i]
|
||||||
|
del self._items[i]
|
||||||
|
|
||||||
|
def find(self, k):
|
||||||
|
'Return first item with a key == k. Raise ValueError if not found.'
|
||||||
|
i = bisect_left(self._keys, k)
|
||||||
|
if i != len(self) and self._keys[i] == k:
|
||||||
|
return self._items[i]
|
||||||
|
raise ValueError('No item found with key equal to: %r' % (k,))
|
||||||
|
|
||||||
|
def find_le(self, k):
|
||||||
|
'Return last item with a key <= k. Raise ValueError if not found.'
|
||||||
|
i = bisect_right(self._keys, k)
|
||||||
|
if i:
|
||||||
|
return self._items[i-1]
|
||||||
|
raise ValueError('No item found with key at or below: %r' % (k,))
|
||||||
|
|
||||||
|
def find_lt(self, k):
|
||||||
|
'Return last item with a key < k. Raise ValueError if not found.'
|
||||||
|
i = bisect_left(self._keys, k)
|
||||||
|
if i:
|
||||||
|
return self._items[i-1]
|
||||||
|
raise ValueError('No item found with key below: %r' % (k,))
|
||||||
|
|
||||||
|
def find_ge(self, k):
|
||||||
|
'Return first item with a key >= equal to k. Raise ValueError if not found'
|
||||||
|
i = bisect_left(self._keys, k)
|
||||||
|
if i != len(self):
|
||||||
|
return self._items[i]
|
||||||
|
raise ValueError('No item found with key at or above: %r' % (k,))
|
||||||
|
|
||||||
|
def find_gt(self, k):
|
||||||
|
'Return first item with a key > k. Raise ValueError if not found'
|
||||||
|
i = bisect_right(self._keys, k)
|
||||||
|
if i != len(self):
|
||||||
|
return self._items[i]
|
||||||
|
raise ValueError('No item found with key above: %r' % (k,))
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------- Simple demo and tests -------------------------
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
def ve2no(f, *args):
|
||||||
|
'Convert ValueError result to -1'
|
||||||
|
try:
|
||||||
|
return f(*args)
|
||||||
|
except ValueError:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
def slow_index(seq, k):
|
||||||
|
'Location of match or -1 if not found'
|
||||||
|
for i, item in enumerate(seq):
|
||||||
|
if item == k:
|
||||||
|
return i
|
||||||
|
return -1
|
||||||
|
|
||||||
|
def slow_find(seq, k):
|
||||||
|
'First item with a key equal to k. -1 if not found'
|
||||||
|
for item in seq:
|
||||||
|
if item == k:
|
||||||
|
return item
|
||||||
|
return -1
|
||||||
|
|
||||||
|
def slow_find_le(seq, k):
|
||||||
|
'Last item with a key less-than or equal to k.'
|
||||||
|
for item in reversed(seq):
|
||||||
|
if item <= k:
|
||||||
|
return item
|
||||||
|
return -1
|
||||||
|
|
||||||
|
def slow_find_lt(seq, k):
|
||||||
|
'Last item with a key less-than k.'
|
||||||
|
for item in reversed(seq):
|
||||||
|
if item < k:
|
||||||
|
return item
|
||||||
|
return -1
|
||||||
|
|
||||||
|
def slow_find_ge(seq, k):
|
||||||
|
'First item with a key-value greater-than or equal to k.'
|
||||||
|
for item in seq:
|
||||||
|
if item >= k:
|
||||||
|
return item
|
||||||
|
return -1
|
||||||
|
|
||||||
|
def slow_find_gt(seq, k):
|
||||||
|
'First item with a key-value greater-than or equal to k.'
|
||||||
|
for item in seq:
|
||||||
|
if item > k:
|
||||||
|
return item
|
||||||
|
return -1
|
||||||
|
|
||||||
|
from random import choice
|
||||||
|
pool = [1.5, 2, 2.0, 3, 3.0, 3.5, 4, 4.0, 4.5]
|
||||||
|
for i in range(500):
|
||||||
|
for n in range(6):
|
||||||
|
s = [choice(pool) for i in range(n)]
|
||||||
|
sc = SortedCollection(s)
|
||||||
|
s.sort()
|
||||||
|
for probe in pool:
|
||||||
|
assert repr(ve2no(sc.index, probe)) == repr(slow_index(s, probe))
|
||||||
|
assert repr(ve2no(sc.find, probe)) == repr(slow_find(s, probe))
|
||||||
|
assert repr(ve2no(sc.find_le, probe)) == repr(slow_find_le(s, probe))
|
||||||
|
assert repr(ve2no(sc.find_lt, probe)) == repr(slow_find_lt(s, probe))
|
||||||
|
assert repr(ve2no(sc.find_ge, probe)) == repr(slow_find_ge(s, probe))
|
||||||
|
assert repr(ve2no(sc.find_gt, probe)) == repr(slow_find_gt(s, probe))
|
||||||
|
for i, item in enumerate(s):
|
||||||
|
assert repr(item) == repr(sc[i]) # test __getitem__
|
||||||
|
assert item in sc # test __contains__ and __iter__
|
||||||
|
assert s.count(item) == sc.count(item) # test count()
|
||||||
|
assert len(sc) == n # test __len__
|
||||||
|
assert list(map(repr, reversed(sc))) == list(map(repr, reversed(s))) # test __reversed__
|
||||||
|
assert list(sc.copy()) == list(sc) # test copy()
|
||||||
|
sc.clear() # test clear()
|
||||||
|
assert len(sc) == 0
|
||||||
|
|
||||||
|
sd = SortedCollection('The quick Brown Fox jumped'.split(), key=str.lower)
|
||||||
|
assert sd._keys == ['brown', 'fox', 'jumped', 'quick', 'the']
|
||||||
|
assert sd._items == ['Brown', 'Fox', 'jumped', 'quick', 'The']
|
||||||
|
assert sd._key == str.lower
|
||||||
|
assert repr(sd) == "SortedCollection(['Brown', 'Fox', 'jumped', 'quick', 'The'], key=lower)"
|
||||||
|
sd.key = str.upper
|
||||||
|
assert sd._key == str.upper
|
||||||
|
assert len(sd) == 5
|
||||||
|
assert list(reversed(sd)) == ['The', 'quick', 'jumped', 'Fox', 'Brown']
|
||||||
|
for item in sd:
|
||||||
|
assert item in sd
|
||||||
|
for i, item in enumerate(sd):
|
||||||
|
assert item == sd[i]
|
||||||
|
sd.insert('jUmPeD')
|
||||||
|
sd.insert_right('QuIcK')
|
||||||
|
assert sd._keys ==['BROWN', 'FOX', 'JUMPED', 'JUMPED', 'QUICK', 'QUICK', 'THE']
|
||||||
|
assert sd._items == ['Brown', 'Fox', 'jUmPeD', 'jumped', 'quick', 'QuIcK', 'The']
|
||||||
|
assert sd.find_le('JUMPED') == 'jumped', sd.find_le('JUMPED')
|
||||||
|
assert sd.find_ge('JUMPED') == 'jUmPeD'
|
||||||
|
assert sd.find_le('GOAT') == 'Fox'
|
||||||
|
assert sd.find_ge('GOAT') == 'jUmPeD'
|
||||||
|
assert sd.find('FOX') == 'Fox'
|
||||||
|
assert sd[3] == 'jumped'
|
||||||
|
assert sd[3:5] ==['jumped', 'quick']
|
||||||
|
assert sd[-2] == 'QuIcK'
|
||||||
|
assert sd[-4:-2] == ['jumped', 'quick']
|
||||||
|
for i, item in enumerate(sd):
|
||||||
|
assert sd.index(item) == i
|
||||||
|
try:
|
||||||
|
sd.index('xyzpdq')
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
assert 0, 'Oops, failed to notify of missing value'
|
||||||
|
sd.remove('jumped')
|
||||||
|
assert list(sd) == ['Brown', 'Fox', 'jUmPeD', 'quick', 'QuIcK', 'The']
|
||||||
|
|
||||||
|
import doctest
|
||||||
|
from operator import itemgetter
|
||||||
|
print(doctest.testmod())
|
||||||
|
|
@ -10,7 +10,7 @@ if [ $DIST_ID != "ubuntu" ]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
echo "Installing the dependencies of the eris script…"
|
echo "Installing the dependencies of the eris script…"
|
||||||
sudo apt --yes install python3-pip python3.8 util-linux python3-sortedcontainers
|
sudo apt --yes install python3-pip python3.8 util-linux
|
||||||
python3.8 -m pip install pyinotify pygments docopt pillow toml
|
python3.8 -m pip install pyinotify pygments docopt pillow toml
|
||||||
echo
|
echo
|
||||||
echo "Installing all the tools eris may need…"
|
echo "Installing all the tools eris may need…"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue