1
0
Fork 0
mirror of https://github.com/qurator-spk/dinglehopper.git synced 2025-06-12 21:29:59 +02:00

🐛 cli_line_dirs: Fix word differences section

At the time of generation of the section, the {gt,ocr}_words generators
were drained. Fix by using a list.

Fixes gh-124.
This commit is contained in:
Gerber, Mike 2024-12-12 19:57:12 +01:00
parent 7dba6a7564
commit f68c2b90bd

View file

@ -1,6 +1,6 @@
import itertools import itertools
import os import os
from typing import Callable, Iterator, Optional, Tuple from typing import Callable, Iterator, Optional, Tuple, List
import click import click
from jinja2 import Environment, FileSystemLoader from jinja2 import Environment, FileSystemLoader
@ -127,8 +127,8 @@ def process(
for k, (gt_fn, ocr_fn) in enumerate(gt_ocr_files): for k, (gt_fn, ocr_fn) in enumerate(gt_ocr_files):
gt_text = plain_extract(gt_fn, include_filename_in_id=True) gt_text = plain_extract(gt_fn, include_filename_in_id=True)
ocr_text = plain_extract(ocr_fn, include_filename_in_id=True) ocr_text = plain_extract(ocr_fn, include_filename_in_id=True)
gt_words = words_normalized(gt_text) gt_words: List[str] = list(words_normalized(gt_text))
ocr_words = words_normalized(ocr_text) ocr_words: List[str] = list(words_normalized(ocr_text))
# Compute CER # Compute CER
l_cer, l_n_characters = character_error_rate_n(gt_text, ocr_text) l_cer, l_n_characters = character_error_rate_n(gt_text, ocr_text)