mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-12 21:29:59 +02:00
🐛 cli_line_dirs: Fix word differences section
At the time of generation of the section, the {gt,ocr}_words generators were drained. Fix by using a list. Fixes gh-124.
This commit is contained in:
parent
7dba6a7564
commit
f68c2b90bd
1 changed files with 3 additions and 3 deletions
|
@ -1,6 +1,6 @@
|
||||||
import itertools
|
import itertools
|
||||||
import os
|
import os
|
||||||
from typing import Callable, Iterator, Optional, Tuple
|
from typing import Callable, Iterator, Optional, Tuple, List
|
||||||
|
|
||||||
import click
|
import click
|
||||||
from jinja2 import Environment, FileSystemLoader
|
from jinja2 import Environment, FileSystemLoader
|
||||||
|
@ -127,8 +127,8 @@ def process(
|
||||||
for k, (gt_fn, ocr_fn) in enumerate(gt_ocr_files):
|
for k, (gt_fn, ocr_fn) in enumerate(gt_ocr_files):
|
||||||
gt_text = plain_extract(gt_fn, include_filename_in_id=True)
|
gt_text = plain_extract(gt_fn, include_filename_in_id=True)
|
||||||
ocr_text = plain_extract(ocr_fn, include_filename_in_id=True)
|
ocr_text = plain_extract(ocr_fn, include_filename_in_id=True)
|
||||||
gt_words = words_normalized(gt_text)
|
gt_words: List[str] = list(words_normalized(gt_text))
|
||||||
ocr_words = words_normalized(ocr_text)
|
ocr_words: List[str] = list(words_normalized(ocr_text))
|
||||||
|
|
||||||
# Compute CER
|
# Compute CER
|
||||||
l_cer, l_n_characters = character_error_rate_n(gt_text, ocr_text)
|
l_cer, l_n_characters = character_error_rate_n(gt_text, ocr_text)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue