1
0
Fork 0
mirror of https://github.com/qurator-spk/dinglehopper.git synced 2025-06-09 11:50:00 +02:00

🎨 dinglehopper: Expose clearing the Levenshtein cache as a function

This commit is contained in:
Gerber, Mike 2019-11-20 13:24:45 +01:00
parent 5cf4eddaeb
commit ced6504ad0
2 changed files with 13 additions and 4 deletions

View file

@ -22,11 +22,11 @@ def levenshtein_matrix(seq1: Sequence, seq2: Sequence):
return _levenshtein_matrix(tuple(seq1), tuple(seq2))
@lru_cache()
@lru_cache(maxsize=10)
def _levenshtein_matrix(seq1: Tuple, seq2: Tuple):
"""Compute the matrix commonly computed to produce the Levenshtein distance.
This is a LRU cached function not meant to be used directly. Use levensthein_matrix() instead.
This is a LRU cached function not meant to be used directly. Use levenshtein_matrix() instead.
"""
m = len(seq1)
n = len(seq2)
@ -60,6 +60,15 @@ def levenshtein(seq1, seq2):
return D[m, n]
def levenshtein_matrix_cache_clear():
"""Clear internal Levenshtein matrix cache.
You want to do this between different input file pairs to decrease memory
usage by not caching results from prior input files.
"""
_levenshtein_matrix.cache_clear()
def distance(s1, s2):
"""Compute the Levenshtein edit distance between two Unicode strings

View file

@ -8,7 +8,7 @@ from ocrd_utils import concat_padded, getLogger
from pkg_resources import resource_string
from qurator.dinglehopper.cli import process as cli_process
from qurator.dinglehopper.edit_distance import _levenshtein_matrix
from qurator.dinglehopper.edit_distance import levenshtein_matrix_cache_clear
log = getLogger('processor.OcrdDinglehopperEvaluate')
@ -64,7 +64,7 @@ class OcrdDinglehopperEvaluate(Processor):
local_filename=report_prefix + report_suffix)
# Clear cache between files
_levenshtein_matrix.cache_clear()
levenshtein_matrix_cache_clear()
if __name__ == '__main__':