mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-09 11:50:00 +02:00
✨ dinglehopper: Show a progressbar on --progress
This commit is contained in:
parent
4951823a29
commit
5ed184c8c4
5 changed files with 11 additions and 3 deletions
|
@ -45,6 +45,7 @@ Usage: dinglehopper [OPTIONS] GT OCR [REPORT_PREFIX]
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
--metrics / --no-metrics Enable/disable metrics and green/red
|
--metrics / --no-metrics Enable/disable metrics and green/red
|
||||||
|
--progress Show progress bar
|
||||||
--help Show this message and exit.
|
--help Show this message and exit.
|
||||||
~~~
|
~~~
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ from .word_error_rate import word_error_rate_n, words_normalized
|
||||||
from .align import seq_align
|
from .align import seq_align
|
||||||
from .extracted_text import ExtractedText
|
from .extracted_text import ExtractedText
|
||||||
from .ocr_files import extract
|
from .ocr_files import extract
|
||||||
|
from .config import Config
|
||||||
|
|
||||||
def gen_diff_report(gt_in, ocr_in, css_prefix, joiner, none):
|
def gen_diff_report(gt_in, ocr_in, css_prefix, joiner, none):
|
||||||
gtx = ''
|
gtx = ''
|
||||||
|
@ -134,7 +134,8 @@ def process(gt, ocr, report_prefix, *, metrics=True):
|
||||||
@click.argument('ocr', type=click.Path(exists=True))
|
@click.argument('ocr', type=click.Path(exists=True))
|
||||||
@click.argument('report_prefix', type=click.Path(), default='report')
|
@click.argument('report_prefix', type=click.Path(), default='report')
|
||||||
@click.option('--metrics/--no-metrics', default=True, help='Enable/disable metrics and green/red')
|
@click.option('--metrics/--no-metrics', default=True, help='Enable/disable metrics and green/red')
|
||||||
def main(gt, ocr, report_prefix, metrics):
|
@click.option('--progress', default=False, is_flag=True, help='Show progress bar')
|
||||||
|
def main(gt, ocr, report_prefix, metrics, progress):
|
||||||
"""
|
"""
|
||||||
Compare the PAGE/ALTO/text document GT against the document OCR.
|
Compare the PAGE/ALTO/text document GT against the document OCR.
|
||||||
|
|
||||||
|
@ -150,6 +151,7 @@ def main(gt, ocr, report_prefix, metrics):
|
||||||
$REPORT_PREFIX defaults to "report". The reports include the character error
|
$REPORT_PREFIX defaults to "report". The reports include the character error
|
||||||
rate (CER) and the word error rate (WER).
|
rate (CER) and the word error rate (WER).
|
||||||
"""
|
"""
|
||||||
|
Config.progress = progress
|
||||||
process(gt, ocr, report_prefix, metrics=metrics)
|
process(gt, ocr, report_prefix, metrics=metrics)
|
||||||
|
|
||||||
|
|
||||||
|
|
2
qurator/dinglehopper/config.py
Normal file
2
qurator/dinglehopper/config.py
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
class Config:
|
||||||
|
progress = False
|
|
@ -7,8 +7,10 @@ from typing import Sequence, Tuple
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from multimethod import multimethod
|
from multimethod import multimethod
|
||||||
from uniseg.graphemecluster import grapheme_clusters
|
from uniseg.graphemecluster import grapheme_clusters
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
from .extracted_text import ExtractedText
|
from .extracted_text import ExtractedText
|
||||||
|
from .config import Config
|
||||||
|
|
||||||
|
|
||||||
def levenshtein_matrix(seq1: Sequence, seq2: Sequence):
|
def levenshtein_matrix(seq1: Sequence, seq2: Sequence):
|
||||||
|
@ -43,7 +45,7 @@ def _levenshtein_matrix(seq1: Tuple, seq2: Tuple):
|
||||||
D[i, 0] = i
|
D[i, 0] = i
|
||||||
for j in from_to(1, n):
|
for j in from_to(1, n):
|
||||||
D[0, j] = j
|
D[0, j] = j
|
||||||
for i in from_to(1, m):
|
for i in tqdm(from_to(1, m), disable=not Config.progress):
|
||||||
for j in from_to(1, n):
|
for j in from_to(1, n):
|
||||||
D[i, j] = min(
|
D[i, j] = min(
|
||||||
D[i - 1, j - 1] + 1 * (seq1[i - 1] != seq2[j - 1]), # Same or Substitution
|
D[i - 1, j - 1] + 1 * (seq1[i - 1] != seq2[j - 1]), # Same or Substitution
|
||||||
|
|
|
@ -8,3 +8,4 @@ MarkupSafe
|
||||||
ocrd >= 2.13.1
|
ocrd >= 2.13.1
|
||||||
attrs
|
attrs
|
||||||
multimethod == 1.3 # latest version to officially support Python 3.5
|
multimethod == 1.3 # latest version to officially support Python 3.5
|
||||||
|
tqdm
|
Loading…
Add table
Add a link
Reference in a new issue