You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
45 lines
1.3 KiB
Python
45 lines
1.3 KiB
Python
from __future__ import division, print_function
|
|
|
|
import unicodedata
|
|
|
|
from .. import levenshtein, distance
|
|
|
|
|
|
def test_levenshtein():
|
|
assert levenshtein("a", "a") == 0
|
|
assert levenshtein("a", "b") == 1
|
|
assert levenshtein("Foo", "Bar") == 3
|
|
|
|
assert levenshtein("", "") == 0
|
|
assert levenshtein("Foo", "") == 3
|
|
assert levenshtein("", "Foo") == 3
|
|
|
|
assert levenshtein("Foo", "Food") == 1
|
|
assert levenshtein("Fnord", "Food") == 2
|
|
assert levenshtein("Müll", "Mull") == 1
|
|
assert levenshtein("Abstand", "Sand") == 4
|
|
|
|
|
|
def test_levenshtein_other_sequences():
|
|
assert levenshtein(["a", "ab"], ["a", "ab", "c"]) == 1
|
|
assert levenshtein(["a", "ab"], ["a", "c"]) == 1
|
|
|
|
|
|
def test_distance():
|
|
assert distance("Fnord", "Food") == 2
|
|
assert distance("Müll", "Mull") == 1
|
|
|
|
word1 = unicodedata.normalize("NFC", "Schlyñ")
|
|
word2 = unicodedata.normalize("NFD", "Schlyñ") # Different, decomposed!
|
|
assert distance(word1, word2) == 0
|
|
|
|
word1 = "Schlyñ"
|
|
assert (
|
|
len(word1) == 6
|
|
) # This ends with LATIN SMALL LETTER N WITH TILDE, so 6 code points
|
|
word2 = "Schlym̃"
|
|
assert (
|
|
len(word2) == 7
|
|
) # This, OTOH, ends with LATIN SMALL LETTER M + COMBINING TILDE, 7 code points
|
|
assert distance(word1, word2) == 1
|