mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-09 11:50:00 +02:00
🎨 dinglehopper: Expose clearing the Levenshtein cache as a function
This commit is contained in:
parent
5cf4eddaeb
commit
ced6504ad0
2 changed files with 13 additions and 4 deletions
|
@ -22,11 +22,11 @@ def levenshtein_matrix(seq1: Sequence, seq2: Sequence):
|
|||
return _levenshtein_matrix(tuple(seq1), tuple(seq2))
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache(maxsize=10)
|
||||
def _levenshtein_matrix(seq1: Tuple, seq2: Tuple):
|
||||
"""Compute the matrix commonly computed to produce the Levenshtein distance.
|
||||
|
||||
This is a LRU cached function not meant to be used directly. Use levensthein_matrix() instead.
|
||||
This is a LRU cached function not meant to be used directly. Use levenshtein_matrix() instead.
|
||||
"""
|
||||
m = len(seq1)
|
||||
n = len(seq2)
|
||||
|
@ -60,6 +60,15 @@ def levenshtein(seq1, seq2):
|
|||
return D[m, n]
|
||||
|
||||
|
||||
def levenshtein_matrix_cache_clear():
|
||||
"""Clear internal Levenshtein matrix cache.
|
||||
|
||||
You want to do this between different input file pairs to decrease memory
|
||||
usage by not caching results from prior input files.
|
||||
"""
|
||||
_levenshtein_matrix.cache_clear()
|
||||
|
||||
|
||||
def distance(s1, s2):
|
||||
"""Compute the Levenshtein edit distance between two Unicode strings
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ from ocrd_utils import concat_padded, getLogger
|
|||
from pkg_resources import resource_string
|
||||
|
||||
from qurator.dinglehopper.cli import process as cli_process
|
||||
from qurator.dinglehopper.edit_distance import _levenshtein_matrix
|
||||
from qurator.dinglehopper.edit_distance import levenshtein_matrix_cache_clear
|
||||
|
||||
log = getLogger('processor.OcrdDinglehopperEvaluate')
|
||||
|
||||
|
@ -64,7 +64,7 @@ class OcrdDinglehopperEvaluate(Processor):
|
|||
local_filename=report_prefix + report_suffix)
|
||||
|
||||
# Clear cache between files
|
||||
_levenshtein_matrix.cache_clear()
|
||||
levenshtein_matrix_cache_clear()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue