From f68c2b90bd959019814b206c3626348560c7e2de Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 12 Dec 2024 19:57:12 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20cli=5Fline=5Fdirs:=20Fix=20word?= =?UTF-8?q?=20differences=20section?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the time of generation of the section, the {gt,ocr}_words generators were drained. Fix by using a list. Fixes gh-124. --- src/dinglehopper/cli_line_dirs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dinglehopper/cli_line_dirs.py b/src/dinglehopper/cli_line_dirs.py index 2cd4fe6..2861d6f 100644 --- a/src/dinglehopper/cli_line_dirs.py +++ b/src/dinglehopper/cli_line_dirs.py @@ -1,6 +1,6 @@ import itertools import os -from typing import Callable, Iterator, Optional, Tuple +from typing import Callable, Iterator, Optional, Tuple, List import click from jinja2 import Environment, FileSystemLoader @@ -127,8 +127,8 @@ def process( for k, (gt_fn, ocr_fn) in enumerate(gt_ocr_files): gt_text = plain_extract(gt_fn, include_filename_in_id=True) ocr_text = plain_extract(ocr_fn, include_filename_in_id=True) - gt_words = words_normalized(gt_text) - ocr_words = words_normalized(ocr_text) + gt_words: List[str] = list(words_normalized(gt_text)) + ocr_words: List[str] = list(words_normalized(ocr_text)) # Compute CER l_cer, l_n_characters = character_error_rate_n(gt_text, ocr_text)