Revert "Revert "Merge pull request #67 from maxbachmann/rapidfuzz""

This reverts commit 76bd50f1db.
pull/73/head
Gerber, Mike 2 years ago
parent ede9402a6c
commit 15dfbac3a7

@ -1,5 +1,5 @@
from .edit_distance import * from .edit_distance import *
from rapidfuzz.string_metric import levenshtein_editops from rapidfuzz.distance import Levenshtein
def align(t1, t2): def align(t1, t2):
@ -13,7 +13,7 @@ def seq_align(s1, s2):
"""Align general sequences.""" """Align general sequences."""
s1 = list(s1) s1 = list(s1)
s2 = list(s2) s2 = list(s2)
ops = levenshtein_editops(s1, s2) ops = Levenshtein.editops(s1, s2)
i = 0 i = 0
j = 0 j = 0

@ -8,7 +8,7 @@ import numpy as np
from multimethod import multimethod from multimethod import multimethod
from uniseg.graphemecluster import grapheme_clusters from uniseg.graphemecluster import grapheme_clusters
from tqdm import tqdm from tqdm import tqdm
from rapidfuzz.string_metric import levenshtein, levenshtein_editops from rapidfuzz.distance import Levenshtein
from .extracted_text import ExtractedText from .extracted_text import ExtractedText
from .config import Config from .config import Config
@ -24,7 +24,7 @@ def distance(s1: str, s2: str):
""" """
seq1 = list(grapheme_clusters(unicodedata.normalize("NFC", s1))) seq1 = list(grapheme_clusters(unicodedata.normalize("NFC", s1)))
seq2 = list(grapheme_clusters(unicodedata.normalize("NFC", s2))) seq2 = list(grapheme_clusters(unicodedata.normalize("NFC", s2)))
return levenshtein(seq1, seq2) return Levenshtein.distance(seq1, seq2)
@multimethod @multimethod
@ -40,4 +40,4 @@ def editops(word1, word2):
""" """
word1 = list(grapheme_clusters(unicodedata.normalize("NFC", word1))) word1 = list(grapheme_clusters(unicodedata.normalize("NFC", word1)))
word2 = list(grapheme_clusters(unicodedata.normalize("NFC", word2))) word2 = list(grapheme_clusters(unicodedata.normalize("NFC", word2)))
return levenshtein_editops(word1, word2) return Levenshtein.editops(word1, word2)

@ -31,7 +31,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from rapidfuzz.string_metric import levenshtein" "from rapidfuzz.distance.Levenshtein import distance as levenshtein"
] ]
}, },
{ {
@ -227,7 +227,7 @@
} }
], ],
"source": [ "source": [
"from rapidfuzz.string_metric import levenshtein_editops as editops\n", "from rapidfuzz.distance.Levenshtein import editops\n",
"\n", "\n",
"editops('Foo', 'Fon')" "editops('Foo', 'Fon')"
] ]

@ -6,7 +6,7 @@ from multimethod import multimethod
import uniseg.wordbreak import uniseg.wordbreak
from rapidfuzz.string_metric import levenshtein from rapidfuzz.distance import Levenshtein
from . import ExtractedText from . import ExtractedText
@ -98,7 +98,7 @@ def word_error_rate_n(reference: Iterable, compared: Iterable) -> Tuple[float, i
reference_seq = list(reference) reference_seq = list(reference)
compared_seq = list(compared) compared_seq = list(compared)
d = levenshtein(reference_seq, compared_seq) d = Levenshtein.distance(reference_seq, compared_seq)
n = len(reference_seq) n = len(reference_seq)
if d == 0: if d == 0:

@ -9,5 +9,5 @@ ocrd >= 2.20.1
attrs attrs
multimethod == 1.3 # latest version to officially support Python 3.5 multimethod == 1.3 # latest version to officially support Python 3.5
tqdm tqdm
rapidfuzz >= 2.0.5 rapidfuzz >= 2.4.2
six # XXX workaround OCR-D/core#730 six # XXX workaround OCR-D/core#730

Loading…
Cancel
Save