You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
41 lines
1.2 KiB
Python
41 lines
1.2 KiB
Python
from __future__ import division, print_function
|
|
|
|
import unicodedata
|
|
|
|
from .. import levenshtein, distance
|
|
|
|
|
|
def test_levenshtein():
|
|
assert levenshtein('a', 'a') == 0
|
|
assert levenshtein('a', 'b') == 1
|
|
assert levenshtein('Foo', 'Bar') == 3
|
|
|
|
assert levenshtein('', '') == 0
|
|
assert levenshtein('Foo', '') == 3
|
|
assert levenshtein('', 'Foo') == 3
|
|
|
|
assert levenshtein('Foo', 'Food') == 1
|
|
assert levenshtein('Fnord', 'Food') == 2
|
|
assert levenshtein('Müll', 'Mull') == 1
|
|
assert levenshtein('Abstand', 'Sand') == 4
|
|
|
|
|
|
def test_levenshtein_other_sequences():
|
|
assert levenshtein(['a', 'ab'], ['a', 'ab', 'c']) == 1
|
|
assert levenshtein(['a', 'ab'], ['a', 'c']) == 1
|
|
|
|
|
|
def test_distance():
|
|
assert distance('Fnord', 'Food') == 2
|
|
assert distance('Müll', 'Mull') == 1
|
|
|
|
word1 = unicodedata.normalize('NFC', 'Schlyñ')
|
|
word2 = unicodedata.normalize('NFD', 'Schlyñ') # Different, decomposed!
|
|
assert distance(word1, word2) == 0
|
|
|
|
word1 = 'Schlyñ'
|
|
assert len(word1) == 6 # This ends with LATIN SMALL LETTER N WITH TILDE, so 6 code points
|
|
word2 = 'Schlym̃'
|
|
assert len(word2) == 7 # This, OTOH, ends with LATIN SMALL LETTER M + COMBINING TILDE, 7 code points
|
|
assert distance(word1, word2) == 1
|