1
0
Fork 0
mirror of https://github.com/qurator-spk/dinglehopper.git synced 2025-07-03 07:29:59 +02:00

🚧 dinglehopper: Display segment id when hovering over a character difference

This commit is contained in:
Gerber, Mike 2020-06-12 13:25:35 +02:00
parent 1f6538b44c
commit 48ad340428
7 changed files with 97 additions and 21 deletions

View file

@ -8,6 +8,7 @@ import numpy as np
from uniseg.graphemecluster import grapheme_clusters
def levenshtein_matrix(seq1: Sequence, seq2: Sequence):
"""Compute the matrix commonly computed to produce the Levenshtein distance.
This is also known as the Wagner-Fischer algorithm. The matrix element at the bottom right contains the desired
@ -75,6 +76,12 @@ def distance(s1, s2):
Note that this is different from levenshtein() as this function knows about Unicode normalization and grapheme
clusters. This should be the correct way to compare two Unicode strings.
"""
# XXX
from .cli import ExtractedText
if isinstance(s1, ExtractedText):
s1 = s1.text
if isinstance(s2, ExtractedText):
s2 = s2.text
s1 = list(grapheme_clusters(unicodedata.normalize('NFC', s1)))
s2 = list(grapheme_clusters(unicodedata.normalize('NFC', s2)))
return levenshtein(s1, s2)