mirror of
				https://github.com/qurator-spk/dinglehopper.git
				synced 2025-11-04 11:24:17 +01:00 
			
		
		
		
	✨ dinglehopper: Show a progressbar on --progress
This commit is contained in:
		
							parent
							
								
									4951823a29
								
							
						
					
					
						commit
						5ed184c8c4
					
				
					 5 changed files with 11 additions and 3 deletions
				
			
		| 
						 | 
				
			
			@ -45,6 +45,7 @@ Usage: dinglehopper [OPTIONS] GT OCR [REPORT_PREFIX]
 | 
			
		|||
 | 
			
		||||
Options:
 | 
			
		||||
  --metrics / --no-metrics  Enable/disable metrics and green/red
 | 
			
		||||
  --progress                Show progress bar
 | 
			
		||||
  --help                    Show this message and exit.
 | 
			
		||||
~~~
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -10,7 +10,7 @@ from .word_error_rate import word_error_rate_n, words_normalized
 | 
			
		|||
from .align import seq_align
 | 
			
		||||
from .extracted_text import ExtractedText
 | 
			
		||||
from .ocr_files import extract
 | 
			
		||||
 | 
			
		||||
from .config import Config
 | 
			
		||||
 | 
			
		||||
def gen_diff_report(gt_in, ocr_in, css_prefix, joiner, none):
 | 
			
		||||
    gtx = ''
 | 
			
		||||
| 
						 | 
				
			
			@ -134,7 +134,8 @@ def process(gt, ocr, report_prefix, *, metrics=True):
 | 
			
		|||
@click.argument('ocr', type=click.Path(exists=True))
 | 
			
		||||
@click.argument('report_prefix', type=click.Path(), default='report')
 | 
			
		||||
@click.option('--metrics/--no-metrics', default=True, help='Enable/disable metrics and green/red')
 | 
			
		||||
def main(gt, ocr, report_prefix, metrics):
 | 
			
		||||
@click.option('--progress', default=False, is_flag=True, help='Show progress bar')
 | 
			
		||||
def main(gt, ocr, report_prefix, metrics, progress):
 | 
			
		||||
    """
 | 
			
		||||
    Compare the PAGE/ALTO/text document GT against the document OCR.
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -150,6 +151,7 @@ def main(gt, ocr, report_prefix, metrics):
 | 
			
		|||
    $REPORT_PREFIX defaults to "report". The reports include the character error
 | 
			
		||||
    rate (CER) and the word error rate (WER).
 | 
			
		||||
    """
 | 
			
		||||
    Config.progress = progress
 | 
			
		||||
    process(gt, ocr, report_prefix, metrics=metrics)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										2
									
								
								qurator/dinglehopper/config.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								qurator/dinglehopper/config.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,2 @@
 | 
			
		|||
class Config:
 | 
			
		||||
    progress = False
 | 
			
		||||
| 
						 | 
				
			
			@ -7,8 +7,10 @@ from typing import Sequence, Tuple
 | 
			
		|||
import numpy as np
 | 
			
		||||
from multimethod import multimethod
 | 
			
		||||
from uniseg.graphemecluster import grapheme_clusters
 | 
			
		||||
from tqdm import tqdm
 | 
			
		||||
 | 
			
		||||
from .extracted_text import ExtractedText
 | 
			
		||||
from .config import Config
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def levenshtein_matrix(seq1: Sequence, seq2: Sequence):
 | 
			
		||||
| 
						 | 
				
			
			@ -43,7 +45,7 @@ def _levenshtein_matrix(seq1: Tuple, seq2: Tuple):
 | 
			
		|||
        D[i, 0] = i
 | 
			
		||||
    for j in from_to(1, n):
 | 
			
		||||
        D[0, j] = j
 | 
			
		||||
    for i in from_to(1, m):
 | 
			
		||||
    for i in tqdm(from_to(1, m), disable=not Config.progress):
 | 
			
		||||
        for j in from_to(1, n):
 | 
			
		||||
            D[i, j] = min(
 | 
			
		||||
                D[i - 1, j - 1] + 1 * (seq1[i - 1] != seq2[j - 1]),  # Same or Substitution
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,3 +8,4 @@ MarkupSafe
 | 
			
		|||
ocrd >= 2.13.1
 | 
			
		||||
attrs
 | 
			
		||||
multimethod == 1.3  # latest version to officially support Python 3.5
 | 
			
		||||
tqdm
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue