mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-08 19:30:01 +02:00
Refactor tests in preparation of refactoring levenshtein.
This commit is contained in:
parent
bd324331e6
commit
11916c2dcf
2 changed files with 33 additions and 19 deletions
|
@ -2,7 +2,7 @@ from __future__ import division, print_function
|
||||||
|
|
||||||
import unicodedata
|
import unicodedata
|
||||||
from functools import partial, lru_cache
|
from functools import partial, lru_cache
|
||||||
from typing import Sequence, Tuple
|
from typing import Sequence, Tuple, List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from multimethod import multimethod
|
from multimethod import multimethod
|
||||||
|
@ -84,7 +84,7 @@ def distance(s1: str, s2: str):
|
||||||
"""
|
"""
|
||||||
seq1 = list(grapheme_clusters(unicodedata.normalize("NFC", s1)))
|
seq1 = list(grapheme_clusters(unicodedata.normalize("NFC", s1)))
|
||||||
seq2 = list(grapheme_clusters(unicodedata.normalize("NFC", s2)))
|
seq2 = list(grapheme_clusters(unicodedata.normalize("NFC", s2)))
|
||||||
return levenshtein(seq1, seq2)
|
return distance(seq1, seq2)
|
||||||
|
|
||||||
|
|
||||||
@multimethod
|
@multimethod
|
||||||
|
@ -92,6 +92,11 @@ def distance(s1: ExtractedText, s2: ExtractedText):
|
||||||
return distance(s1.text, s2.text)
|
return distance(s1.text, s2.text)
|
||||||
|
|
||||||
|
|
||||||
|
@multimethod
|
||||||
|
def distance(s1: List, s2: List):
|
||||||
|
return levenshtein(s1, s2)
|
||||||
|
|
||||||
|
|
||||||
def seq_editops(seq1, seq2):
|
def seq_editops(seq1, seq2):
|
||||||
"""
|
"""
|
||||||
Return sequence of edit operations transforming one sequence to another.
|
Return sequence of edit operations transforming one sequence to another.
|
||||||
|
|
|
@ -2,33 +2,42 @@ from __future__ import division, print_function
|
||||||
|
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from .. import levenshtein, distance
|
from .. import levenshtein, distance
|
||||||
|
|
||||||
|
|
||||||
def test_levenshtein():
|
TEST_PARAMS = "seq1,seq2,expected_dist"
|
||||||
assert levenshtein("a", "a") == 0
|
|
||||||
assert levenshtein("a", "b") == 1
|
|
||||||
assert levenshtein("Foo", "Bar") == 3
|
|
||||||
|
|
||||||
assert levenshtein("", "") == 0
|
TEST_STRINGS = [
|
||||||
assert levenshtein("Foo", "") == 3
|
("a", "a", 0),
|
||||||
assert levenshtein("", "Foo") == 3
|
("a", "b", 1),
|
||||||
|
("Foo", "Bar", 3),
|
||||||
|
("", "", 0),
|
||||||
|
("Foo", "", 3),
|
||||||
|
("", "Foo", 3),
|
||||||
|
("Foo", "Food", 1),
|
||||||
|
("Fnord", "Food", 2),
|
||||||
|
("Müll", "Mull", 1),
|
||||||
|
("Abstand", "Sand", 4),
|
||||||
|
]
|
||||||
|
|
||||||
assert levenshtein("Foo", "Food") == 1
|
TEST_SEQUENCES = [(["a", "ab"], ["a", "ab", "c"], 1), (["a", "ab"], ["a", "c"], 1)]
|
||||||
assert levenshtein("Fnord", "Food") == 2
|
|
||||||
assert levenshtein("Müll", "Mull") == 1
|
|
||||||
assert levenshtein("Abstand", "Sand") == 4
|
|
||||||
|
|
||||||
|
|
||||||
def test_levenshtein_other_sequences():
|
@pytest.mark.parametrize(TEST_PARAMS, [*TEST_STRINGS, *TEST_SEQUENCES])
|
||||||
assert levenshtein(["a", "ab"], ["a", "ab", "c"]) == 1
|
def test_distance_sequences(seq1, seq2, expected_dist):
|
||||||
assert levenshtein(["a", "ab"], ["a", "c"]) == 1
|
dist = distance(seq1, seq2)
|
||||||
|
assert dist == expected_dist
|
||||||
|
|
||||||
|
|
||||||
def test_distance():
|
@pytest.mark.parametrize(TEST_PARAMS, TEST_STRINGS)
|
||||||
assert distance("Fnord", "Food") == 2
|
def test_distance(seq1, seq2, expected_dist):
|
||||||
assert distance("Müll", "Mull") == 1
|
dist = distance(seq1, seq2)
|
||||||
|
assert dist == expected_dist
|
||||||
|
|
||||||
|
|
||||||
|
def test_distance_unicode_wide():
|
||||||
word1 = unicodedata.normalize("NFC", "Schlyñ")
|
word1 = unicodedata.normalize("NFC", "Schlyñ")
|
||||||
word2 = unicodedata.normalize("NFD", "Schlyñ") # Different, decomposed!
|
word2 = unicodedata.normalize("NFD", "Schlyñ") # Different, decomposed!
|
||||||
assert distance(word1, word2) == 0
|
assert distance(word1, word2) == 0
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue