From a18b25b1633d71b1019e3952eee26c78ba6c2d12 Mon Sep 17 00:00:00 2001
From: "Gerber, Mike" <mike.gerber@sbb.spk-berlin.de>
Date: Fri, 27 Jan 2023 19:13:45 +0100
Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Update=20tests=20for=20Extracted?=
 =?UTF-8?q?Text?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In PR gh-72, @maxbachmann introduced a new argument for ExtractedText(). Update the
corresponding tests.
---
 .../dinglehopper/tests/extracted_text_test.py | 31 ++++++++++++-------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/qurator/dinglehopper/tests/extracted_text_test.py b/qurator/dinglehopper/tests/extracted_text_test.py
index 8a81587..bc230d6 100644
--- a/qurator/dinglehopper/tests/extracted_text_test.py
+++ b/qurator/dinglehopper/tests/extracted_text_test.py
@@ -13,12 +13,13 @@ def test_text():
     test1 = ExtractedText(
         None,
         [
-            ExtractedText("s0", None, None, "foo"),
-            ExtractedText("s1", None, None, "bar"),
-            ExtractedText("s2", None, None, "bazinga"),
+            ExtractedText("s0", None, None, "foo", grapheme_clusters("foo")),
+            ExtractedText("s1", None, None, "bar", grapheme_clusters("bar")),
+            ExtractedText("s2", None, None, "bazinga", grapheme_clusters("bazinga")),
         ],
         " ",
         None,
+        None,
     )
 
     assert test1.text == "foo bar bazinga"
@@ -29,8 +30,12 @@ def test_text():
 
 def test_normalization_check():
     with pytest.raises(ValueError, match=r".*is not in NFC.*"):
-        ExtractedText("foo", None, None, unicodedata.normalize("NFD", "Schlyñ"))
-    assert ExtractedText("foo", None, None, unicodedata.normalize("NFC", "Schlyñ"))
+        ExtractedText("foo", None, None,
+                      unicodedata.normalize("NFD", "Schlyñ"),
+                      grapheme_clusters(unicodedata.normalize("NFD", "Schlyñ")))
+    assert ExtractedText("foo", None, None,
+                         unicodedata.normalize("NFC", "Schlyñ"),
+                         grapheme_clusters(unicodedata.normalize("NFC", "Schlyñ")))
 
 
 AlignmentElement = namedtuple("AlignmentElement", "left right left_id right_id")
@@ -47,25 +52,27 @@ def test_align():
     test1 = ExtractedText(
         None,
         [
-            ExtractedText("s0", None, None, "foo"),
-            ExtractedText("s1", None, None, "bar"),
-            ExtractedText("s2", None, None, "batzinga"),
+            ExtractedText("s0", None, None, "foo", grapheme_clusters("foo")),
+            ExtractedText("s1", None, None, "bar", grapheme_clusters("bar")),
+            ExtractedText("s2", None, None, "batzinga", grapheme_clusters("batzinga")),
         ],
         " ",
         None,
+        None,
     )
     test2 = ExtractedText(
         None,
         [
-            ExtractedText("x0", None, None, "foo"),
-            ExtractedText("x1", None, None, "bar"),
+            ExtractedText("x0", None, None, "foo", grapheme_clusters("foo")),
+            ExtractedText("x1", None, None, "bar", grapheme_clusters("bar")),
             # extra .
-            ExtractedText("x2", None, None, "."),
+            ExtractedText("x2", None, None, ".", grapheme_clusters(".")),
             # deletion + different grapheme cluster, m̃ also is two Python characters
-            ExtractedText("x3", None, None, "bazim̃ga"),
+            ExtractedText("x3", None, None, "bazim̃ga", grapheme_clusters("bazim̃ga")),
         ],
         " ",
         None,
+        None,
     )
 
     left_pos = 0