From f68c2b90bd959019814b206c3626348560c7e2de Mon Sep 17 00:00:00 2001
From: "Gerber, Mike" <mike.gerber@sbb.spk-berlin.de>
Date: Thu, 12 Dec 2024 19:57:12 +0100
Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20cli=5Fline=5Fdirs:=20Fix=20word?=
 =?UTF-8?q?=20differences=20section?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

At the time of generation of the section, the {gt,ocr}_words generators
were drained. Fix by using a list.

Fixes gh-124.
---
 src/dinglehopper/cli_line_dirs.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/dinglehopper/cli_line_dirs.py b/src/dinglehopper/cli_line_dirs.py
index 2cd4fe6..2861d6f 100644
--- a/src/dinglehopper/cli_line_dirs.py
+++ b/src/dinglehopper/cli_line_dirs.py
@@ -1,6 +1,6 @@
 import itertools
 import os
-from typing import Callable, Iterator, Optional, Tuple
+from typing import Callable, Iterator, Optional, Tuple, List
 
 import click
 from jinja2 import Environment, FileSystemLoader
@@ -127,8 +127,8 @@ def process(
     for k, (gt_fn, ocr_fn) in enumerate(gt_ocr_files):
         gt_text = plain_extract(gt_fn, include_filename_in_id=True)
         ocr_text = plain_extract(ocr_fn, include_filename_in_id=True)
-        gt_words = words_normalized(gt_text)
-        ocr_words = words_normalized(ocr_text)
+        gt_words: List[str] = list(words_normalized(gt_text))
+        ocr_words: List[str] = list(words_normalized(ocr_text))
 
         # Compute CER
         l_cer, l_n_characters = character_error_rate_n(gt_text, ocr_text)