You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
dinglehopper/qurator/dinglehopper/align.py

44 lines
1018 B
Python

from .edit_distance import *
def align(t1, t2):
"""Align text."""
s1 = list(grapheme_clusters(unicodedata.normalize('NFC', t1)))
s2 = list(grapheme_clusters(unicodedata.normalize('NFC', t2)))
return seq_align(s1, s2)
def seq_align(s1, s2):
"""Align general sequences."""
s1 = list(s1)
s2 = list(s2)
ops = seq_editops(s1, s2)
i = 0
j = 0
while i < len(s1) or j < len(s2):
o = None
try:
ot = ops[0]
if ot[1] == i and ot[2] == j:
ops = ops[1:]
o = ot
except IndexError:
pass
if o:
if o[0] == 'insert':
yield (None, s2[j])
j += 1
elif o[0] == 'delete':
yield (s1[i], None)
i += 1
elif o[0] == 'replace':
yield (s1[i], s2[j])
i += 1
j += 1
else:
yield (s1[i], s2[j])
i += 1
j += 1