mirror of
				https://github.com/qurator-spk/dinglehopper.git
				synced 2025-10-30 17:04:15 +01:00 
			
		
		
		
	🎨 dinglehopper: Expose clearing the Levenshtein cache as a function
This commit is contained in:
		
							parent
							
								
									5cf4eddaeb
								
							
						
					
					
						commit
						ced6504ad0
					
				
					 2 changed files with 13 additions and 4 deletions
				
			
		|  | @ -22,11 +22,11 @@ def levenshtein_matrix(seq1: Sequence, seq2: Sequence): | |||
|     return _levenshtein_matrix(tuple(seq1), tuple(seq2)) | ||||
| 
 | ||||
| 
 | ||||
| @lru_cache() | ||||
| @lru_cache(maxsize=10) | ||||
| def _levenshtein_matrix(seq1: Tuple, seq2: Tuple): | ||||
|     """Compute the matrix commonly computed to produce the Levenshtein distance. | ||||
| 
 | ||||
|     This is a LRU cached function not meant to be used directly. Use levensthein_matrix() instead. | ||||
|     This is a LRU cached function not meant to be used directly. Use levenshtein_matrix() instead. | ||||
|     """ | ||||
|     m = len(seq1) | ||||
|     n = len(seq2) | ||||
|  | @ -60,6 +60,15 @@ def levenshtein(seq1, seq2): | |||
|     return D[m, n] | ||||
| 
 | ||||
| 
 | ||||
| def levenshtein_matrix_cache_clear(): | ||||
|     """Clear internal Levenshtein matrix cache. | ||||
| 
 | ||||
|     You want to do this between different input file pairs to decrease memory | ||||
|     usage by not caching results from prior input files. | ||||
|     """ | ||||
|     _levenshtein_matrix.cache_clear() | ||||
| 
 | ||||
| 
 | ||||
| def distance(s1, s2): | ||||
|     """Compute the Levenshtein edit distance between two Unicode strings | ||||
| 
 | ||||
|  |  | |||
|  | @ -8,7 +8,7 @@ from ocrd_utils import concat_padded, getLogger | |||
| from pkg_resources import resource_string | ||||
| 
 | ||||
| from qurator.dinglehopper.cli import process as cli_process | ||||
| from qurator.dinglehopper.edit_distance import _levenshtein_matrix | ||||
| from qurator.dinglehopper.edit_distance import levenshtein_matrix_cache_clear | ||||
| 
 | ||||
| log = getLogger('processor.OcrdDinglehopperEvaluate') | ||||
| 
 | ||||
|  | @ -64,7 +64,7 @@ class OcrdDinglehopperEvaluate(Processor): | |||
|                      local_filename=report_prefix + report_suffix) | ||||
| 
 | ||||
|             # Clear cache between files | ||||
|             _levenshtein_matrix.cache_clear() | ||||
|             levenshtein_matrix_cache_clear() | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue