virtualize FilterableDropdown and add new SearchIndex

3 years ago · 8951bbee18
7 changed files with 254 additions and 44 deletions
--- a/gooey/init.py
+++ b/gooey/init.py
@ -3,4 +3,5 @@ from gooey.python_bindings.gooey_decorator import Gooey
 from gooey.python_bindings.gooey_parser import GooeyParser
 from gooey.gui.util.freeze import localResourcePath as local_resource_path
 from gooey.python_bindings import constants
+from gooey.gui.components.filtering.prefix_filter import PrefixTokenizers
 __version__ = '1.0.5'
--- a/gooey/gui/components/filtering/init.py
+++ b/gooey/gui/components/filtering/init.py
--- a/gooey/gui/components/filtering/prefix_filter.py
+++ b/gooey/gui/components/filtering/prefix_filter.py
@ -0,0 +1,118 @@
+import re
+
+import pygtrie as trie
+from functools import reduce
+
+__ALL__ = ('PrefixTokenizers', 'PrefixSearch')
+
+
+
+class PrefixTokenizers:
+    # This string here is just an arbitrary long string so that
+    # re.split finds no matches and returns the entire phrase
+    ENTIRE_PHRASE = '::gooey/tokenization/entire-phrase'
+    # \s == any whitespace character
+    WORDS = r'\s'
+
+    @classmethod
+    def REGEX(cls, expression):
+        return expression
+
+
+
+class SearchOptions:
+    def __init__(self,
+                 choice_tokenizer=PrefixTokenizers.ENTIRE_PHRASE,
+                 input_tokenizer=PrefixTokenizers.ENTIRE_PHRASE,
+                 ignore_case=True,
+                 operator='AND',
+                 index_suffix= False,
+                 **kwargs):
+        self.choice_tokenizer = choice_tokenizer
+        self.input_tokenizer = input_tokenizer
+        self.ignore_case = ignore_case
+        self.operator = operator
+        self.index_suffix = index_suffix
+
+
+
+class PrefixSearch(object):
+    """
+    A trie backed index for quickly finding substrings
+    in a list of options.
+    """
+
+    def __init__(self, choices, options={}, *args, **kwargs):
+        self.choices = sorted(filter(None, choices))
+        self.options: SearchOptions = SearchOptions(**options)
+        self.searchtree = self.buildSearchTrie(choices)
+
+    def updateChoices(self, choices):
+        self.choices = sorted(filter(None, choices))
+        self.searchtree = trie.Trie()
+
+    def findMatches(self, token):
+        if not token:
+            return sorted(self.choices)
+        tokens = self.tokenizeInput(token)
+        matches = [set(flatten(self._vals(self.searchtree, prefix=t))) for t in tokens]
+        op = intersection if self.options.operator == 'AND' else union
+        return sorted(reduce(op, matches))
+
+    def tokenizeInput(self, token):
+        """
+        Cleans and tokenizes the user's input.
+
+        empty characters and spaces are trimmed to prevent
+        matching all paths in the index.
+        """
+        return list(filter(None, re.split(self.options.input_tokenizer, self.clean(token))))
+
+    def tokenizeChoice(self, choice):
+        """
+        Splits the `choice` into a series of tokens based on
+        the user's criteria.
+
+        If suffix indexing is enabled, the individual tokens
+        are further broken down and indexed by their suffix offsets. e.g.
+
+            'Banana', 'anana', 'nana', 'ana'
+        """
+        choice_ = self.clean(choice)
+        tokens = re.split(self.options.choice_tokenizer, choice_)
+        if self.options.index_suffix:
+            return [token[i:]
+                    for token in tokens
+                    for i in range(len(token) - 2)]
+        else:
+            return tokens
+
+    def clean(self, text):
+        return text.lower() if self.options.ignore_case else text
+
+    def buildSearchTrie(self, choices):
+        searchtrie = trie.Trie()
+        for choice in choices:
+            for token in self.tokenizeChoice(choice):
+                if not searchtrie.has_key(token):
+                    searchtrie[token] = []
+                searchtrie[token].append(choice)
+        return searchtrie
+
+    def _vals(self, searchtrie, **kwargs):
+        try:
+            return searchtrie.values(**kwargs)
+        except KeyError:
+            return []
+
+
+def intersection(a, b):
+    return a.intersection(b)
+
+
+def union(a, b):
+    return a.union(b)
+
+
+def flatten(xs):
+    return [item for x in xs for item in x]
--- a/gooey/gui/components/widgets/dropdown_filterable.py
+++ b/gooey/gui/components/widgets/dropdown_filterable.py
@ -1,12 +1,14 @@
 from contextlib import contextmanager

 import wx
+import wx.html

 import gooey.gui.events as events
+from gooey.gui.components.filtering.prefix_filter import PrefixSearch
+from gooey.gui.components.mouse import notifyMouseEvent
 from gooey.gui.components.widgets.dropdown import Dropdown
 from gooey.gui.lang.i18n import _
 from gooey.gui.pubsub import pub
-from gooey.gui.components.mouse import notifyMouseEvent

 __ALL__ = ('FilterableDropdown',)

@ -14,7 +16,6 @@ __ALL__ = ('FilterableDropdown',)
 class FilterableDropdown(Dropdown):
    """
    TODO: tests for gooey_options
-    TODO: better search strategy than linear
    TODO: documentation
    A dropdown with auto-complete / filtering behaviors.

@ -63,15 +64,22 @@ class FilterableDropdown(Dropdown):
        """
        if self.widget.GetValue() != self.model.displayValue:
            self.widget.ChangeValue(model.displayValue)
-        if self.listbox.GetItems() != self.model.suggestions:
-            self.listbox.SetItems(model.suggestions)
+
+        self.listbox.Clear()
+        self.listbox.SetItemCount(len(self.model.suggestions))
+        if len(self.model.suggestions) == 1:
+            # I have no clue why this is required, but without
+            # manually flicking the virtualized listbox off/on
+            # it won't paint the update when there's only a single
+            # item being displayed
+            self.listbox.Show(False)
+            self.listbox.Show(self.model.suggestionsVisible)
        if model.selectedSuggestion > -1:
            self.listbox.SetSelection(model.selectedSuggestion)
            self.widget.SetInsertionPoint(-1)
            self.widget.SetSelection(999, -1)
        else:
            self.listbox.SetSelection(-1)
-        self.listbox.SetMaxSize(self.model.maxSize)
        self.estimateBestSize()
        self.listbox.Show(self.model.suggestionsVisible)
        self.Layout()
@ -92,7 +100,8 @@ class FilterableDropdown(Dropdown):
        self.comboCtrl.OnButtonClick = self.onButton
        self.foo = ListCtrlComboPopup()
        self.comboCtrl.SetPopupControl(self.foo)
-        self.listbox = wx.ListBox(self, choices=self._meta['choices'], style=wx.LB_SINGLE)
+        self.listbox = VirtualizedListBox(self)
+        self.listbox.OnGetItem = self.OnGetItem
        # model is created here because the design of these widget
        # classes is broken.
        self.model = FilterableDropdownModel(self._meta['choices'], self._options, listeners=[self.interpretState])
@ -101,6 +110,9 @@ class FilterableDropdown(Dropdown):
        self.listbox.AcceptsFocusFromKeyboard = lambda *args, **kwargs: False
        return self.comboCtrl

+    def OnGetItem(self, n):
+        return self.model.suggestions[n]
+
    def getSublayout(self, *args, **kwargs):
        verticalSizer = wx.BoxSizer(wx.VERTICAL)
        layout = wx.BoxSizer(wx.HORIZONTAL)
@ -127,7 +139,7 @@ class FilterableDropdown(Dropdown):
            self.model.showSuggestions()

    def onClickSuggestion(self, event):
-        self.model.acceptSuggestion(event.String)
+        self.model.acceptSuggestion(self.model.suggestions[event.Selection])
        event.Skip()

    def onMouseClick(self, wxEvent):
@ -162,7 +174,7 @@ class FilterableDropdown(Dropdown):
                self.model.generateSuggestions(self.model.displayValue)
                self.model.showSuggestions()
            else:
-                if self.listbox.GetItems()[0] != self.model.noMatch:
+                if self.listbox.OnGetItem(0) != self.model.noMatch:
                    self.ignore = True
                    if event.GetKeyCode() == wx.WXK_DOWN:
                        self.model.incSelectedSuggestion()
@ -182,14 +194,26 @@ class FilterableDropdown(Dropdown):
        of items within it. This is a rough estimate based on the
        current font size.
        """
-        padding = 7
+        padding = 11
        rowHeight = self.listbox.GetFont().GetPixelSize()[1] + padding
        maxHeight = self.model.maxSize[1]
-        self.listbox.SetMaxSize((-1, min(maxHeight, len(self.listbox.GetItems()) * rowHeight)))
+        self.listbox.SetMaxSize((-1, min(maxHeight, len(self.model.suggestions) * rowHeight)))
+        self.listbox.SetMinSize((-1, min(maxHeight, len(self.model.suggestions) * rowHeight)))
        self.listbox.SetSize((-1, -1))



+class VirtualizedListBox(wx.html.HtmlListBox):
+    def __init__(self, *args, **kwargs):
+        super(VirtualizedListBox, self).__init__(*args, **kwargs)
+        self.SetItemCount(1)
+
+    def OnGetItem(self, n):
+        return ''
+
+
+
+
 class FilterableDropdownModel(object):
    """
    The model/state for the FilterableDropdown. While this is still one
@ -209,10 +233,11 @@ class FilterableDropdownModel(object):
        self.suggestionsVisible = False
        self.noMatch = options.get('no_matches', _('dropdown.no_matches'))
        self.choices = choices
-        self.suggestions = []
+        self.suggestions = choices
        self.selectedSuggestion = -1
        self.suggestionsVisible = False
        self.maxSize = (-1, options.get('max_size', 80))
+        self.strat = PrefixSearch(choices, options.get('search_strategy', {}))

    def __str__(self):
        return str(vars(self))
@ -229,6 +254,7 @@ class FilterableDropdownModel(object):
        """Update the available choices in response
        to a dynamic update"""
        self.choices = choices
+        self.strat.updateChoices(choices)

    def handleTextInput(self, value):
        if self.dropEvent:
@ -237,6 +263,7 @@ class FilterableDropdownModel(object):
            with self.notify():
                self.actualValue = value
                self.displayValue = value
+                self.selectedSuggestion = -1
                self.generateSuggestions(value)
                self.suggestionsVisible = True

@ -264,8 +291,7 @@ class FilterableDropdownModel(object):
            self.selectedSuggestion = -1

    def generateSuggestions(self, prompt):
-        prompt = prompt.lower()
-        suggestions = [choice for choice in self.choices if choice.lower().startswith(prompt)]
+        suggestions = self.strat.findMatches(prompt)
        final_suggestions = suggestions if suggestions else [self.noMatch]
        self.suggestions = final_suggestions

--- a/gooey/tests/test_filterable_dropdown.py
+++ b/gooey/tests/test_filterable_dropdown.py
@ -30,37 +30,6 @@ class TestGooeyFilterableDropdown(unittest.TestCase):
                dropdown.listbox.IsShown()
            )

-    def test_relevant_suggestions_shown(self):
-        choices = ['alpha1', 'alpha2', 'beta', 'gamma']
-        cases = [['a', choices[:2]],
-                 ['A', choices[:2]],
-                 ['AlPh', choices[:2]],
-                 ['Alpha1', choices[:1]],
-                 ['b', choices[2:3]],
-                 ['g', choices[-1:]]]
-
-        parser = self.make_parser(choices=choices)
-        with instrumentGooey(parser) as (app, gooeyApp):
-            for input, expected in cases:
-                with self.subTest(f'given input {input}, expect: {expected}'):
-                    dropdown = gooeyApp.configs[0].reifiedWidgets[0]
-
-                    event = wx.CommandEvent(wx.wxEVT_TEXT, wx.Window.NewControlId())
-                    event.SetString(input)
-                    dropdown.widget.GetTextCtrl().ProcessEvent(event)
-                    # model and UI agree
-                    self.assertTrue(
-                        dropdown.model.suggestionsVisible,
-                        dropdown.listbox.IsShown()
-                    )
-                    # model and UI agree
-                    self.assertEqual(
-                        dropdown.model.suggestions,
-                        dropdown.listbox.GetItems(),
-                    )
-                    self.assertEqual(dropdown.model.suggestions,expected)
-
-
    def test_arrow_key_selection_cycling(self):
        """
        Testing that the up/down arrow keys spawn the dropdown
--- a/gooey/tests/test_filtering.py
+++ b/gooey/tests/test_filtering.py
@ -0,0 +1,92 @@
+import unittest
+
+from gooey import PrefixTokenizers
+from gui.components.filtering.prefix_filter import SearchOptions, PrefixSearch
+from collections import namedtuple
+
+TestData = namedtuple('TestData', [
+    'options',
+    'input_string',
+    'expected_results',
+])
+
+Places = namedtuple('Places', [
+    'kabul',
+    'tirana',
+    'kyoto',
+    'tokyo'
+])
+
+class TestPrefixFilter(unittest.TestCase):
+
+
+    def setUp(self):
+        self.testdata = Places(
+            'Afghanistan Kabul',
+            'Albania Tirana',
+            'Japan Kyoto',
+            'Japan Tokyo'
+        )
+
+    def test_prefix_searching(self):
+        p = self.testdata
+        cases = [
+            TestData({'ignore_case': True}, 'a', [p.kabul, p.tirana]),
+            TestData({'ignore_case': True}, 'A', [p.kabul, p.tirana]),
+            TestData({'ignore_case': False}, 'a', []),
+            TestData({'ignore_case': False}, 'A', [p.kabul, p.tirana]),
+
+            # when using the phrase tokenizer, the search input must
+            # match starting from the beginning. So we find Afghanistan
+            TestData({'choice_tokenizer': PrefixTokenizers.ENTIRE_PHRASE}, 'Afghan', [p.kabul]),
+            # but we cannot look up Kyoto because the phrase begins with "Japan"
+            TestData({'choice_tokenizer': PrefixTokenizers.ENTIRE_PHRASE}, 'Kyoto', []),
+            # So if we start with "Japan K" it'll be returned
+            TestData({'choice_tokenizer': PrefixTokenizers.ENTIRE_PHRASE}, 'Japan K', [p.kyoto]),
+
+
+
+            # word tokenizer will split on all whitespace and index
+            # each choice one for each UNIQUE word
+            # so passing in 'a' will match "Af" and "Al" as usual
+            TestData({'choice_tokenizer': PrefixTokenizers.WORDS}, 'a', [p.kabul, p.tirana]),
+            # but now we can also find Kyoto without prefixing "japan" as we'd
+            # need to do with the phrase tokenizer
+            TestData({'choice_tokenizer': PrefixTokenizers.WORDS}, 'kyo', [p.kyoto]),
+
+            # if we tokenize the input, we're perform two searches against the index
+            # The default operator is AND, which means all the words in your search
+            # input must match the choice for it to count as as a hit.
+            # In this example, we index the choices under PHRASE, but set the input
+            # tokenizer to WORDS. Our input 'Japan K' gets tokenized to ['Japan', 'K']
+            # There is no phrase which starts with Both "Japan" and "K" so we get no
+            # matches returned
+            TestData({'choice_tokenizer': PrefixTokenizers.ENTIRE_PHRASE,
+                      'input_tokenizer': PrefixTokenizers.WORDS}, 'Japan K', []),
+            # Tokenize the choices by WORDS means we can now filter on both words
+            TestData({'choice_tokenizer': PrefixTokenizers.WORDS,
+                      'input_tokenizer': PrefixTokenizers.WORDS}, 'Jap K', [p.kyoto]),
+            # the default AND behavior can be swapped to OR to facilitate matching across
+            # different records in the index.
+            TestData({'choice_tokenizer': PrefixTokenizers.WORDS,
+                      'input_tokenizer': PrefixTokenizers.WORDS,
+                      'operator': 'OR'}, 'Kyo Tok', [p.kyoto, p.tokyo]),
+
+            # Turning on Suffix indexing allow matching anywhere within a word.
+            # Now 'kyo' will match both the beginning 'Kyoto' and substring 'ToKYO'
+            TestData({'choice_tokenizer': PrefixTokenizers.WORDS,
+                      'input_tokenizer': PrefixTokenizers.WORDS,
+                      'index_suffix': True}, 'kyo ', [p.kyoto, p.tokyo]),
+
+            TestData({'choice_tokenizer': PrefixTokenizers.WORDS,
+                      'input_tokenizer': PrefixTokenizers.WORDS,
+                      'index_suffix': True}, 'j kyo ', [p.kyoto, p.tokyo]),
+        ]
+
+        for case in cases:
+            with self.subTest(case):
+                searcher = PrefixSearch(self.testdata, case.options)
+                result = searcher.findMatches(case.input_string)
+                self.assertEqual(result, case.expected_results)
+
+
--- a/gooey/util/functional.py
+++ b/gooey/util/functional.py
@ -23,6 +23,10 @@ def assoc(m, key, val):
    cpy[key] = val
    return cpy

+def dissoc(m, key, val):
+    cpy = deepcopy(m)
+    del cpy[key]
+    return cpy

 def associn(m, path, value):
    """ Copy-on-write associates a value in a nested dict """