mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-07-11 11:29:57 +02:00
Revert "Merge branch 'master' of https://github.com/qurator-spk/sbb_textline_detector"
This reverts commit a3c1eee8f31349edcfb1e36920763bcecceb1129, reversing changes made to dc76213ffc1fbabc2c45f0e52ced55449bdf2e83.
This commit is contained in:
parent
1303a7d92f
commit
f94e8b9b1c
73 changed files with 64834 additions and 1686 deletions
48
qurator/dinglehopper/tests/test_editops.py
Normal file
48
qurator/dinglehopper/tests/test_editops.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
import unicodedata
|
||||
|
||||
from .. import seq_editops, editops
|
||||
|
||||
|
||||
def test_trivial():
|
||||
assert seq_editops('abc', 'abc') == []
|
||||
assert seq_editops('', '') == []
|
||||
|
||||
|
||||
def test_insert():
|
||||
assert seq_editops('bc', 'abc') == [('insert', 0, 0)]
|
||||
assert seq_editops('ac', 'abc') == [('insert', 1, 1)]
|
||||
assert seq_editops('ab', 'abc') == [('insert', 2, 2)]
|
||||
assert seq_editops('', 'a') == [('insert', 0, 0)]
|
||||
|
||||
|
||||
def test_multiple():
|
||||
assert seq_editops('bcd', 'abce') == [('insert', 0, 0), ('replace', 2, 3)]
|
||||
|
||||
|
||||
def test_delete():
|
||||
assert seq_editops('abcdef', 'cdef') == [('delete', 0, 0), ('delete', 1, 0)]
|
||||
assert seq_editops('Xabcdef', 'Xcdef') == [('delete', 1, 1), ('delete', 2, 1)]
|
||||
assert seq_editops('abcdefg', 'acdefX') == [('delete', 1, 1), ('replace', 6, 5)]
|
||||
assert seq_editops('abcde', 'aabcd') == [('insert', 1, 1), ('delete', 4, 5)]
|
||||
assert seq_editops('Foo', '') == [('delete', 0, 0), ('delete', 1, 0), ('delete', 2, 0)]
|
||||
assert seq_editops('Foolish', 'Foo') == [('delete', 3, 3), ('delete', 4, 3), ('delete', 5, 3), ('delete', 6, 3)]
|
||||
|
||||
|
||||
def test_ambiguous():
|
||||
assert seq_editops('bcd', 'abcef') == [('insert', 0, 0), ('replace', 2, 3), ('insert', 3, 4)]
|
||||
|
||||
|
||||
def test_editops():
|
||||
"""Test editops() in cases where dealing with grapheme clusters matters"""
|
||||
|
||||
# In these cases, one of the words has a composed form, the other one does not.
|
||||
assert editops('Schlyñ', 'Schlym̃') == [('replace', 5, 5)]
|
||||
assert editops('oͤde', 'öde') == [('replace', 0, 0)]
|
||||
|
||||
|
||||
def test_editops_canonically_equivalent():
|
||||
left = unicodedata.lookup('LATIN SMALL LETTER N') + unicodedata.lookup('COMBINING TILDE')
|
||||
right = unicodedata.lookup('LATIN SMALL LETTER N WITH TILDE')
|
||||
assert left != right
|
||||
assert unicodedata.normalize('NFC', left) == unicodedata.normalize('NFC', right)
|
||||
assert editops(left, right) == []
|
Loading…
Add table
Add a link
Reference in a new issue