mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-09 11:50:00 +02:00
🚧 dinglehopper: Add word differences in line-dirs report
This commit is contained in:
parent
dbb660615a
commit
cb2be96179
1 changed files with 7 additions and 8 deletions
|
@ -43,6 +43,7 @@ def process(gt_dir, ocr_dir, report_prefix, *, metrics=True):
|
||||||
cer = None
|
cer = None
|
||||||
n_characters = None
|
n_characters = None
|
||||||
char_diff_report = ""
|
char_diff_report = ""
|
||||||
|
word_diff_report = ""
|
||||||
|
|
||||||
for k, gt in enumerate(os.listdir(gt_dir)):
|
for k, gt in enumerate(os.listdir(gt_dir)):
|
||||||
# Find a match by replacing the suffix
|
# Find a match by replacing the suffix
|
||||||
|
@ -68,13 +69,11 @@ def process(gt_dir, ocr_dir, report_prefix, *, metrics=True):
|
||||||
gt_text, ocr_text, css_prefix="l{0}-c".format(k), joiner="", none="·"
|
gt_text, ocr_text, css_prefix="l{0}-c".format(k), joiner="", none="·"
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO
|
gt_words = words_normalized(gt_text)
|
||||||
# gt_words = words_normalized(gt_text)
|
ocr_words = words_normalized(ocr_text)
|
||||||
# ocr_words = words_normalized(ocr_text)
|
word_diff_report += gen_diff_report(
|
||||||
# word_diff_report = gen_diff_report(
|
gt_words, ocr_words, css_prefix="l{0}-w".format(k), joiner=" ", none="⋯"
|
||||||
# gt_words, ocr_words, css_prefix="w", joiner=" ", none="⋯"
|
)
|
||||||
# )
|
|
||||||
word_diff_report = "TODO"
|
|
||||||
|
|
||||||
|
|
||||||
# XXX this is a copy from cli.py
|
# XXX this is a copy from cli.py
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue