🎨 dinglehopper: Expose clearing the Levenshtein cache as a function

pull/29/head
Gerber, Mike 5 years ago
parent 5cf4eddaeb
commit ced6504ad0

@ -22,11 +22,11 @@ def levenshtein_matrix(seq1: Sequence, seq2: Sequence):
return _levenshtein_matrix(tuple(seq1), tuple(seq2)) return _levenshtein_matrix(tuple(seq1), tuple(seq2))
@lru_cache() @lru_cache(maxsize=10)
def _levenshtein_matrix(seq1: Tuple, seq2: Tuple): def _levenshtein_matrix(seq1: Tuple, seq2: Tuple):
"""Compute the matrix commonly computed to produce the Levenshtein distance. """Compute the matrix commonly computed to produce the Levenshtein distance.
This is a LRU cached function not meant to be used directly. Use levensthein_matrix() instead. This is a LRU cached function not meant to be used directly. Use levenshtein_matrix() instead.
""" """
m = len(seq1) m = len(seq1)
n = len(seq2) n = len(seq2)
@ -60,6 +60,15 @@ def levenshtein(seq1, seq2):
return D[m, n] return D[m, n]
def levenshtein_matrix_cache_clear():
"""Clear internal Levenshtein matrix cache.
You want to do this between different input file pairs to decrease memory
usage by not caching results from prior input files.
"""
_levenshtein_matrix.cache_clear()
def distance(s1, s2): def distance(s1, s2):
"""Compute the Levenshtein edit distance between two Unicode strings """Compute the Levenshtein edit distance between two Unicode strings

@ -8,7 +8,7 @@ from ocrd_utils import concat_padded, getLogger
from pkg_resources import resource_string from pkg_resources import resource_string
from qurator.dinglehopper.cli import process as cli_process from qurator.dinglehopper.cli import process as cli_process
from qurator.dinglehopper.edit_distance import _levenshtein_matrix from qurator.dinglehopper.edit_distance import levenshtein_matrix_cache_clear
log = getLogger('processor.OcrdDinglehopperEvaluate') log = getLogger('processor.OcrdDinglehopperEvaluate')
@ -64,7 +64,7 @@ class OcrdDinglehopperEvaluate(Processor):
local_filename=report_prefix + report_suffix) local_filename=report_prefix + report_suffix)
# Clear cache between files # Clear cache between files
_levenshtein_matrix.cache_clear() levenshtein_matrix_cache_clear()
if __name__ == '__main__': if __name__ == '__main__':

Loading…
Cancel
Save