Skip to content

Commit

Permalink
added longest_prefix() method and corresponding tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Dobatymo committed Oct 29, 2020
1 parent 239a9ae commit 3ce2808
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 2 deletions.
59 changes: 59 additions & 0 deletions src/dawg.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,24 @@ cdef class DAWG:

return res

def longest_prefix(self, unicode key):
cdef BaseType index = self.dct.root()
cdef int pos = 1
cdef int lastpos = 0
cdef CharType ch

for ch in key:
if not self.dct.Follow(ch, &index):
break
if self.dct.has_value(index):
lastpos = pos
pos += 1

if lastpos:
return key[:lastpos]
else:
raise KeyError("No prefix found")

def iterprefixes(self, unicode key):
'''
Return a generator with keys of this DAWG that are prefixes of the ``key``.
Expand Down Expand Up @@ -802,7 +820,28 @@ cdef class BytesDAWG(CompletionDAWG):
"""
return self._similar_item_values(0, key, self.dct.root(), replaces)

def longest_prefix(self, unicode key):
cdef BaseType index = self.dct.root()
cdef BaseType tmp
cdef BaseType lastindex
cdef int pos = 1
cdef int lastpos = 0
cdef CharType ch

for ch in key:
if not self.dct.Follow(ch, &index):
break

tmp = index
if self.dct.Follow(self._c_payload_separator, &tmp):
lastpos = pos
lastindex = tmp
pos += 1

if lastpos:
return key[:lastpos], self._value_for_index(lastindex)
else:
raise KeyError("No prefix found")

cdef class RecordDAWG(BytesDAWG):
"""
Expand Down Expand Up @@ -904,6 +943,26 @@ cdef class IntDAWG(DAWG):
cpdef int b_get_value(self, bytes key):
return self.dct.Find(key)

def longest_prefix(self, unicode key):
cdef BaseType index = self.dct.root()
cdef BaseType lastindex
cdef int pos = 1
cdef int lastpos = 0
cdef CharType ch

for ch in key:
if not self.dct.Follow(ch, &index):
break

if self.dct.has_value(index):
lastpos = pos
lastindex = index
pos += 1

if lastpos:
return key[:lastpos], self.dct.value(lastindex)
else:
raise KeyError("No prefix found")

# FIXME: code duplication.
cdef class IntCompletionDAWG(CompletionDAWG):
Expand Down
14 changes: 13 additions & 1 deletion tests/test_dawg.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,12 @@ def test_unicode_sorting(self):
# if data is sorted according to unicode rules.
dawg.DAWG([key1, key2])


def test_longest_prefix(self):
d = dawg.DAWG(["a", "as", "asdf"])
assert d.longest_prefix("a") == "a"
assert d.longest_prefix("as") == "as"
assert d.longest_prefix("asd") == "as"
assert d.longest_prefix("asdf") == "asdf"

class TestIntDAWG(object):

Expand Down Expand Up @@ -148,6 +153,13 @@ def test_int_value_ranges(self):
with pytest.raises(OverflowError):
self.IntDAWG({'f': 2**32-1})

def test_longest_prefix(self):
d = dawg.IntDAWG([("a", 1), ("as", 2), ("asdf", 3)])
assert d.longest_prefix("a") == ("a", 1)
assert d.longest_prefix("as") == ("as", 2)
assert d.longest_prefix("asd") == ("as", 2)
assert d.longest_prefix("asdf") == ("asdf", 3)


class TestIntCompletionDAWG(TestIntDAWG):
IntDAWG = dawg.IntCompletionDAWG # checks that all tests for IntDAWG pass
Expand Down
7 changes: 6 additions & 1 deletion tests/test_payload_dawg.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,12 @@ def test_build_error(self):
with pytest.raises(dawg.Error):
self.dawg(payload_separator=b'f')


def test_longest_prefix(self):
d = dawg.BytesDAWG([("a", b"a1"), ("a", b"a2"), ("as", b"as"), ("asdf", b"asdf")])
assert d.longest_prefix("a") == ("a", [b"a1", b"a2"])
assert d.longest_prefix("as") == ("as", [b"as"])
assert d.longest_prefix("asd") == ("as", [b"as"])
assert d.longest_prefix("asdf") == ("asdf", [b"asdf"])

class TestRecordDAWG(object):

Expand Down

0 comments on commit 3ce2808

Please sign in to comment.