mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-07-03 07:29:59 +02:00
🚧 dinglehopper: Display segment id when hovering over a character difference
This commit is contained in:
parent
1f6538b44c
commit
48ad340428
7 changed files with 97 additions and 21 deletions
|
@ -8,6 +8,7 @@ import numpy as np
|
|||
from uniseg.graphemecluster import grapheme_clusters
|
||||
|
||||
|
||||
|
||||
def levenshtein_matrix(seq1: Sequence, seq2: Sequence):
|
||||
"""Compute the matrix commonly computed to produce the Levenshtein distance.
|
||||
This is also known as the Wagner-Fischer algorithm. The matrix element at the bottom right contains the desired
|
||||
|
@ -75,6 +76,12 @@ def distance(s1, s2):
|
|||
Note that this is different from levenshtein() as this function knows about Unicode normalization and grapheme
|
||||
clusters. This should be the correct way to compare two Unicode strings.
|
||||
"""
|
||||
# XXX
|
||||
from .cli import ExtractedText
|
||||
if isinstance(s1, ExtractedText):
|
||||
s1 = s1.text
|
||||
if isinstance(s2, ExtractedText):
|
||||
s2 = s2.text
|
||||
s1 = list(grapheme_clusters(unicodedata.normalize('NFC', s1)))
|
||||
s2 = list(grapheme_clusters(unicodedata.normalize('NFC', s2)))
|
||||
return levenshtein(s1, s2)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue