From 5ed184c8c42c454df0886b3061bfb3a0b52f0068 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 15 Oct 2020 16:09:17 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20dinglehopper:=20Show=20a=20progress?= =?UTF-8?q?bar=20on=20--progress?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 1 + qurator/dinglehopper/cli.py | 6 ++++-- qurator/dinglehopper/config.py | 2 ++ qurator/dinglehopper/edit_distance.py | 4 +++- requirements.txt | 1 + 5 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 qurator/dinglehopper/config.py diff --git a/README.md b/README.md index 662a40b..fd947fe 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,7 @@ Usage: dinglehopper [OPTIONS] GT OCR [REPORT_PREFIX] Options: --metrics / --no-metrics Enable/disable metrics and green/red + --progress Show progress bar --help Show this message and exit. ~~~ diff --git a/qurator/dinglehopper/cli.py b/qurator/dinglehopper/cli.py index 03c35cd..cce9672 100644 --- a/qurator/dinglehopper/cli.py +++ b/qurator/dinglehopper/cli.py @@ -10,7 +10,7 @@ from .word_error_rate import word_error_rate_n, words_normalized from .align import seq_align from .extracted_text import ExtractedText from .ocr_files import extract - +from .config import Config def gen_diff_report(gt_in, ocr_in, css_prefix, joiner, none): gtx = '' @@ -134,7 +134,8 @@ def process(gt, ocr, report_prefix, *, metrics=True): @click.argument('ocr', type=click.Path(exists=True)) @click.argument('report_prefix', type=click.Path(), default='report') @click.option('--metrics/--no-metrics', default=True, help='Enable/disable metrics and green/red') -def main(gt, ocr, report_prefix, metrics): +@click.option('--progress', default=False, is_flag=True, help='Show progress bar') +def main(gt, ocr, report_prefix, metrics, progress): """ Compare the PAGE/ALTO/text document GT against the document OCR. @@ -150,6 +151,7 @@ def main(gt, ocr, report_prefix, metrics): $REPORT_PREFIX defaults to "report". The reports include the character error rate (CER) and the word error rate (WER). """ + Config.progress = progress process(gt, ocr, report_prefix, metrics=metrics) diff --git a/qurator/dinglehopper/config.py b/qurator/dinglehopper/config.py new file mode 100644 index 0000000..27081d4 --- /dev/null +++ b/qurator/dinglehopper/config.py @@ -0,0 +1,2 @@ +class Config: + progress = False diff --git a/qurator/dinglehopper/edit_distance.py b/qurator/dinglehopper/edit_distance.py index ec49338..721296d 100644 --- a/qurator/dinglehopper/edit_distance.py +++ b/qurator/dinglehopper/edit_distance.py @@ -7,8 +7,10 @@ from typing import Sequence, Tuple import numpy as np from multimethod import multimethod from uniseg.graphemecluster import grapheme_clusters +from tqdm import tqdm from .extracted_text import ExtractedText +from .config import Config def levenshtein_matrix(seq1: Sequence, seq2: Sequence): @@ -43,7 +45,7 @@ def _levenshtein_matrix(seq1: Tuple, seq2: Tuple): D[i, 0] = i for j in from_to(1, n): D[0, j] = j - for i in from_to(1, m): + for i in tqdm(from_to(1, m), disable=not Config.progress): for j in from_to(1, n): D[i, j] = min( D[i - 1, j - 1] + 1 * (seq1[i - 1] != seq2[j - 1]), # Same or Substitution diff --git a/requirements.txt b/requirements.txt index c2e47dc..57da857 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ MarkupSafe ocrd >= 2.13.1 attrs multimethod == 1.3 # latest version to officially support Python 3.5 +tqdm \ No newline at end of file