mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-08 11:20:26 +02:00
Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
b5ceaa8690
2 changed files with 15 additions and 9 deletions
|
@ -11,26 +11,26 @@ repos:
|
||||||
- id: check-ast
|
- id: check-ast
|
||||||
|
|
||||||
- repo: https://github.com/psf/black
|
- repo: https://github.com/psf/black
|
||||||
rev: 23.10.0
|
rev: 23.12.1
|
||||||
hooks:
|
hooks:
|
||||||
- id: black
|
- id: black
|
||||||
|
|
||||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
rev: v0.1.1
|
rev: v0.1.10
|
||||||
hooks:
|
hooks:
|
||||||
- args:
|
- args:
|
||||||
- --fix
|
- --fix
|
||||||
- --exit-non-zero-on-fix
|
- --exit-non-zero-on-fix
|
||||||
id: ruff
|
id: ruff
|
||||||
|
|
||||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||||
rev: v1.6.1
|
rev: v1.8.0
|
||||||
hooks:
|
hooks:
|
||||||
- additional_dependencies:
|
- additional_dependencies:
|
||||||
- types-setuptools
|
- types-setuptools
|
||||||
id: mypy
|
id: mypy
|
||||||
|
|
||||||
- repo: https://gitlab.com/vojko.pribudic/pre-commit-update
|
- repo: https://gitlab.com/vojko.pribudic/pre-commit-update
|
||||||
rev: v0.1.0
|
rev: v0.1.1
|
||||||
hooks:
|
hooks:
|
||||||
- id: pre-commit-update
|
- id: pre-commit-update
|
||||||
|
|
|
@ -122,9 +122,11 @@ def process(
|
||||||
|
|
||||||
gt_text = extract(gt, textequiv_level=textequiv_level)
|
gt_text = extract(gt, textequiv_level=textequiv_level)
|
||||||
ocr_text = extract(ocr, textequiv_level=textequiv_level)
|
ocr_text = extract(ocr, textequiv_level=textequiv_level)
|
||||||
gt_words = words_normalized(gt_text)
|
gt_words: list = list(words_normalized(gt_text))
|
||||||
ocr_words = words_normalized(ocr_text)
|
ocr_words: list = list(words_normalized(ocr_text))
|
||||||
|
|
||||||
|
assert isinstance(gt_text, ExtractedText)
|
||||||
|
assert isinstance(ocr_text, ExtractedText)
|
||||||
cer, n_characters = character_error_rate_n(gt_text, ocr_text)
|
cer, n_characters = character_error_rate_n(gt_text, ocr_text)
|
||||||
char_diff_report, diff_c = gen_diff_report(
|
char_diff_report, diff_c = gen_diff_report(
|
||||||
gt_text,
|
gt_text,
|
||||||
|
@ -136,6 +138,10 @@ def process(
|
||||||
differences=differences,
|
differences=differences,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# {gt,ocr}_words must not be a generator, so we don't drain it for the differences
|
||||||
|
# report.
|
||||||
|
assert isinstance(gt_words, list)
|
||||||
|
assert isinstance(ocr_words, list)
|
||||||
wer, n_words = word_error_rate_n(gt_words, ocr_words)
|
wer, n_words = word_error_rate_n(gt_words, ocr_words)
|
||||||
word_diff_report, diff_w = gen_diff_report(
|
word_diff_report, diff_w = gen_diff_report(
|
||||||
gt_words,
|
gt_words,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue