from .edit_distance import * from rapidfuzz.distance import Levenshtein def align(t1, t2): """Align text.""" s1 = list(grapheme_clusters(unicodedata.normalize("NFC", t1))) s2 = list(grapheme_clusters(unicodedata.normalize("NFC", t2))) return seq_align(s1, s2) def seq_align(s1, s2): """Align general sequences.""" s1 = list(s1) s2 = list(s2) ops = Levenshtein.editops(s1, s2) i = 0 j = 0 while i < len(s1) or j < len(s2): o = None try: ot = ops[0] if ot[1] == i and ot[2] == j: ops = ops[1:] o = ot except IndexError: pass if o: if o[0] == "insert": yield None, s2[j] j += 1 elif o[0] == "delete": yield s1[i], None i += 1 elif o[0] == "replace": yield s1[i], s2[j] i += 1 j += 1 else: yield s1[i], s2[j] i += 1 j += 1