Refactor tests in preparation of refactoring levenshtein.

pull/48/head
Benjamin Rosemann 4 years ago
parent bd324331e6
commit 11916c2dcf

@ -2,7 +2,7 @@ from __future__ import division, print_function
import unicodedata import unicodedata
from functools import partial, lru_cache from functools import partial, lru_cache
from typing import Sequence, Tuple from typing import Sequence, Tuple, List
import numpy as np import numpy as np
from multimethod import multimethod from multimethod import multimethod
@ -84,7 +84,7 @@ def distance(s1: str, s2: str):
""" """
seq1 = list(grapheme_clusters(unicodedata.normalize("NFC", s1))) seq1 = list(grapheme_clusters(unicodedata.normalize("NFC", s1)))
seq2 = list(grapheme_clusters(unicodedata.normalize("NFC", s2))) seq2 = list(grapheme_clusters(unicodedata.normalize("NFC", s2)))
return levenshtein(seq1, seq2) return distance(seq1, seq2)
@multimethod @multimethod
@ -92,6 +92,11 @@ def distance(s1: ExtractedText, s2: ExtractedText):
return distance(s1.text, s2.text) return distance(s1.text, s2.text)
@multimethod
def distance(s1: List, s2: List):
return levenshtein(s1, s2)
def seq_editops(seq1, seq2): def seq_editops(seq1, seq2):
""" """
Return sequence of edit operations transforming one sequence to another. Return sequence of edit operations transforming one sequence to another.

@ -2,33 +2,42 @@ from __future__ import division, print_function
import unicodedata import unicodedata
import pytest
from .. import levenshtein, distance from .. import levenshtein, distance
def test_levenshtein(): TEST_PARAMS = "seq1,seq2,expected_dist"
assert levenshtein("a", "a") == 0
assert levenshtein("a", "b") == 1 TEST_STRINGS = [
assert levenshtein("Foo", "Bar") == 3 ("a", "a", 0),
("a", "b", 1),
("Foo", "Bar", 3),
("", "", 0),
("Foo", "", 3),
("", "Foo", 3),
("Foo", "Food", 1),
("Fnord", "Food", 2),
("Müll", "Mull", 1),
("Abstand", "Sand", 4),
]
assert levenshtein("", "") == 0 TEST_SEQUENCES = [(["a", "ab"], ["a", "ab", "c"], 1), (["a", "ab"], ["a", "c"], 1)]
assert levenshtein("Foo", "") == 3
assert levenshtein("", "Foo") == 3
assert levenshtein("Foo", "Food") == 1
assert levenshtein("Fnord", "Food") == 2
assert levenshtein("Müll", "Mull") == 1
assert levenshtein("Abstand", "Sand") == 4
@pytest.mark.parametrize(TEST_PARAMS, [*TEST_STRINGS, *TEST_SEQUENCES])
def test_distance_sequences(seq1, seq2, expected_dist):
dist = distance(seq1, seq2)
assert dist == expected_dist
def test_levenshtein_other_sequences():
assert levenshtein(["a", "ab"], ["a", "ab", "c"]) == 1
assert levenshtein(["a", "ab"], ["a", "c"]) == 1
@pytest.mark.parametrize(TEST_PARAMS, TEST_STRINGS)
def test_distance(seq1, seq2, expected_dist):
dist = distance(seq1, seq2)
assert dist == expected_dist
def test_distance():
assert distance("Fnord", "Food") == 2
assert distance("Müll", "Mull") == 1
def test_distance_unicode_wide():
word1 = unicodedata.normalize("NFC", "Schlyñ") word1 = unicodedata.normalize("NFC", "Schlyñ")
word2 = unicodedata.normalize("NFD", "Schlyñ") # Different, decomposed! word2 = unicodedata.normalize("NFD", "Schlyñ") # Different, decomposed!
assert distance(word1, word2) == 0 assert distance(word1, word2) == 0

Loading…
Cancel
Save