You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
dinglehopper/qurator/dinglehopper/tests/test_align.py

109 lines
3.7 KiB
Python

from .util import unzip
from .. import align, seq_align, distance
def test_left_empty():
result = list(align('', 'foo'))
expected = [(None, 'f'), (None, 'o'), (None, 'o')]
assert result == expected
def test_right_empty():
result = list(align('foo', ''))
expected = [('f', None), ('o', None), ('o', None)]
assert result == expected
def test_left_longer():
result = list(align('food', 'foo'))
expected = [('f', 'f'), ('o', 'o'), ('o', 'o'), ('d', None)]
assert result == expected
def test_right_longer():
result = list(align('foo', 'food'))
expected = [('f', 'f'), ('o', 'o'), ('o', 'o'), (None, 'd')]
assert result == expected
def test_some_diff():
result = list(align('abcde', 'aaadef'))
left, right = unzip(result)
assert list(left) == ['a', 'b', 'c', 'd', 'e', None]
assert list(right) == ['a', 'a', 'a', 'd', 'e', 'f']
def test_longer():
s1 = 'Dies ist eine Tst!'
s2 = 'Dies ist ein Test.'
result = list(align(s1, s2)) # ; diffprint(*unzip(result))
expected = [('D', 'D'), ('i', 'i'), ('e', 'e'), ('s', 's'), (' ', ' '),
('i', 'i'), ('s', 's'), ('t', 't'), (' ', ' '),
('e', 'e'), ('i', 'i'), ('n', 'n'), ('e', None), (' ', ' '),
('T', 'T'), (None, 'e'), ('s', 's'), ('t', 't'), ('!', '.')]
assert result == expected
def test_completely_different():
assert len(list(align('abcde', 'fghij'))) == 5
def test_with_some_fake_ocr_errors():
result = list(align('Über die vielen Sorgen wegen desselben vergaß',
'SomeJunk MoreJunk Übey die vielen Sorgen wegen AdditionalJunk deffelben vcrgab'))
left, right = unzip(result)
# Beginning
assert list(left[:18]) == [None]*18
assert list(right[:18]) == list('SomeJunk MoreJunk ')
# End
assert list(left[-1:]) == ['ß']
assert list(right[-1:]) == ['b']
def test_lines():
"""Test comparing list of lines.
This mainly serves as documentation for comparing lists of lines.
"""
result = list(seq_align(
['This is a line.', 'This is another', 'And the last line'],
['This is a line.', 'This is another', 'J u n k', 'And the last line']
))
left, right = unzip(result)
assert list(left) == ['This is a line.', 'This is another', None, 'And the last line']
assert list(right) == ['This is a line.', 'This is another', 'J u n k', 'And the last line']
def test_lines_similar():
"""Test comparing list of lines while using a "weaker equivalence".
This mainly serves as documentation.
"""
class SimilarString:
def __init__(self, string):
self._string = string
def __eq__(self, other):
return distance(self._string, other._string) < 2 # XXX NOT the final version
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return 'SimilarString(\'%s\')' % self._string
def __hash__(self):
return hash(self._string)
result = list(seq_align(
[SimilarString('This is a line.'), SimilarString('This is another'), SimilarString('And the last line')],
[SimilarString('This is a ljne.'), SimilarString('This is another'), SimilarString('J u n k'), SimilarString('And the last line')]
))
left, right = unzip(result)
assert list(left) == [SimilarString('This is a line.'), SimilarString('This is another'), None, SimilarString('And the last line')]
assert list(right) == [SimilarString('This is a ljne.'), SimilarString('This is another'), SimilarString('J u n k'), SimilarString('And the last line')]