mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-07 19:05:13 +02:00
Merge pull request #67 from maxbachmann/rapidfuzz
replace usage of deprecated rapidfuzz APIs
This commit is contained in:
commit
85f751aacc
5 changed files with 10 additions and 10 deletions
|
@ -1,5 +1,5 @@
|
||||||
from .edit_distance import *
|
from .edit_distance import *
|
||||||
from rapidfuzz.string_metric import levenshtein_editops
|
from rapidfuzz.distance import Levenshtein
|
||||||
|
|
||||||
|
|
||||||
def align(t1, t2):
|
def align(t1, t2):
|
||||||
|
@ -13,7 +13,7 @@ def seq_align(s1, s2):
|
||||||
"""Align general sequences."""
|
"""Align general sequences."""
|
||||||
s1 = list(s1)
|
s1 = list(s1)
|
||||||
s2 = list(s2)
|
s2 = list(s2)
|
||||||
ops = levenshtein_editops(s1, s2)
|
ops = Levenshtein.editops(s1, s2)
|
||||||
i = 0
|
i = 0
|
||||||
j = 0
|
j = 0
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ import numpy as np
|
||||||
from multimethod import multimethod
|
from multimethod import multimethod
|
||||||
from uniseg.graphemecluster import grapheme_clusters
|
from uniseg.graphemecluster import grapheme_clusters
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from rapidfuzz.string_metric import levenshtein, levenshtein_editops
|
from rapidfuzz.distance import Levenshtein
|
||||||
|
|
||||||
from .extracted_text import ExtractedText
|
from .extracted_text import ExtractedText
|
||||||
from .config import Config
|
from .config import Config
|
||||||
|
@ -24,7 +24,7 @@ def distance(s1: str, s2: str):
|
||||||
"""
|
"""
|
||||||
seq1 = list(grapheme_clusters(unicodedata.normalize("NFC", s1)))
|
seq1 = list(grapheme_clusters(unicodedata.normalize("NFC", s1)))
|
||||||
seq2 = list(grapheme_clusters(unicodedata.normalize("NFC", s2)))
|
seq2 = list(grapheme_clusters(unicodedata.normalize("NFC", s2)))
|
||||||
return levenshtein(seq1, seq2)
|
return Levenshtein.distance(seq1, seq2)
|
||||||
|
|
||||||
|
|
||||||
@multimethod
|
@multimethod
|
||||||
|
@ -40,4 +40,4 @@ def editops(word1, word2):
|
||||||
"""
|
"""
|
||||||
word1 = list(grapheme_clusters(unicodedata.normalize("NFC", word1)))
|
word1 = list(grapheme_clusters(unicodedata.normalize("NFC", word1)))
|
||||||
word2 = list(grapheme_clusters(unicodedata.normalize("NFC", word2)))
|
word2 = list(grapheme_clusters(unicodedata.normalize("NFC", word2)))
|
||||||
return levenshtein_editops(word1, word2)
|
return Levenshtein.editops(word1, word2)
|
||||||
|
|
|
@ -31,7 +31,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from rapidfuzz.string_metric import levenshtein"
|
"from rapidfuzz.distance.Levenshtein import distance as levenshtein"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -227,7 +227,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"from rapidfuzz.string_metric import levenshtein_editops as editops\n",
|
"from rapidfuzz.distance.Levenshtein import editops\n",
|
||||||
"\n",
|
"\n",
|
||||||
"editops('Foo', 'Fon')"
|
"editops('Foo', 'Fon')"
|
||||||
]
|
]
|
||||||
|
|
|
@ -6,7 +6,7 @@ from multimethod import multimethod
|
||||||
|
|
||||||
import uniseg.wordbreak
|
import uniseg.wordbreak
|
||||||
|
|
||||||
from rapidfuzz.string_metric import levenshtein
|
from rapidfuzz.distance import Levenshtein
|
||||||
from . import ExtractedText
|
from . import ExtractedText
|
||||||
|
|
||||||
|
|
||||||
|
@ -98,7 +98,7 @@ def word_error_rate_n(reference: Iterable, compared: Iterable) -> Tuple[float, i
|
||||||
reference_seq = list(reference)
|
reference_seq = list(reference)
|
||||||
compared_seq = list(compared)
|
compared_seq = list(compared)
|
||||||
|
|
||||||
d = levenshtein(reference_seq, compared_seq)
|
d = Levenshtein.distance(reference_seq, compared_seq)
|
||||||
n = len(reference_seq)
|
n = len(reference_seq)
|
||||||
|
|
||||||
if d == 0:
|
if d == 0:
|
||||||
|
|
|
@ -9,5 +9,5 @@ ocrd >= 2.20.1
|
||||||
attrs
|
attrs
|
||||||
multimethod == 1.3 # latest version to officially support Python 3.5
|
multimethod == 1.3 # latest version to officially support Python 3.5
|
||||||
tqdm
|
tqdm
|
||||||
rapidfuzz >= 2.0.5
|
rapidfuzz >= 2.4.2
|
||||||
six # XXX workaround OCR-D/core#730
|
six # XXX workaround OCR-D/core#730
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue