From 0f0819512e4e958a88eaf1fa9a0e9a8f13d7c200 Mon Sep 17 00:00:00 2001
From: "Gerber, Mike" <mike.gerber@sbb.spk-berlin.de>
Date: Thu, 2 Mar 2023 10:22:51 +0100
Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=A8=20Reformat=20using=20Black?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 qurator/dinglehopper/cli_line_dirs.py             | 10 +++++++---
 qurator/dinglehopper/ocr_files.py                 | 15 +++++++++++----
 qurator/dinglehopper/ocrd_cli.py                  |  2 +-
 qurator/dinglehopper/tests/test_integ_ocrd_cli.py |  2 +-
 qurator/dinglehopper/word_error_rate.py           |  2 --
 setup.py                                          |  2 +-
 6 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/qurator/dinglehopper/cli_line_dirs.py b/qurator/dinglehopper/cli_line_dirs.py
index 4c07ce5..950f668 100644
--- a/qurator/dinglehopper/cli_line_dirs.py
+++ b/qurator/dinglehopper/cli_line_dirs.py
@@ -32,7 +32,7 @@ def common_suffix(its):
 
 def removesuffix(text, suffix):
     if suffix and text.endswith(suffix):
-        return text[:-len(suffix)]
+        return text[: -len(suffix)]
     return text
 
 
@@ -52,7 +52,9 @@ def process(gt_dir, ocr_dir, report_prefix, *, metrics=True):
         ocr = removesuffix(gt, gt_suffix) + ocr_suffix
 
         gt_text = plain_extract(os.path.join(gt_dir, gt), include_filename_in_id=True)
-        ocr_text = plain_extract(os.path.join(ocr_dir, ocr), include_filename_in_id=True)
+        ocr_text = plain_extract(
+            os.path.join(ocr_dir, ocr), include_filename_in_id=True
+        )
 
         # Compute CER
         l_cer, l_n_characters = character_error_rate_n(gt_text, ocr_text)
@@ -60,7 +62,9 @@ def process(gt_dir, ocr_dir, report_prefix, *, metrics=True):
             cer, n_characters = l_cer, l_n_characters
         else:
             # Rolling update
-            cer = (cer * n_characters + l_cer * l_n_characters) / (n_characters + l_n_characters)
+            cer = (cer * n_characters + l_cer * l_n_characters) / (
+                n_characters + l_n_characters
+            )
             n_characters = n_characters + l_n_characters
 
         # Compute WER
diff --git a/qurator/dinglehopper/ocr_files.py b/qurator/dinglehopper/ocr_files.py
index 69f4df7..97e56ed 100644
--- a/qurator/dinglehopper/ocr_files.py
+++ b/qurator/dinglehopper/ocr_files.py
@@ -98,14 +98,18 @@ def extract_texts_from_reading_order_group(group, tree, nsmap, textequiv_level):
 
         ro_children = filter(lambda child: "index" in child.attrib.keys(), ro_children)
         ro_children = sorted(ro_children, key=lambda child: int(child.attrib["index"]))
-    elif ET.QName(group.tag).localname in ["UnorderedGroup","UnorderedGroupIndexed"]:
+    elif ET.QName(group.tag).localname in ["UnorderedGroup", "UnorderedGroupIndexed"]:
         ro_children = list(group)
     else:
         raise NotImplementedError
 
-
     for ro_child in ro_children:
-        if ET.QName(ro_child.tag).localname in ["OrderedGroup", "OrderedGroupIndexed", "UnorderedGroup", "UnorderedGroupIndexed"]:
+        if ET.QName(ro_child.tag).localname in [
+            "OrderedGroup",
+            "OrderedGroupIndexed",
+            "UnorderedGroup",
+            "UnorderedGroupIndexed",
+        ]:
             regions.extend(
                 extract_texts_from_reading_order_group(
                     ro_child, tree, nsmap, textequiv_level
@@ -139,7 +143,10 @@ def plain_extract(filename, include_filename_in_id=False):
             [
                 ExtractedText(
                     id_template.format(filename=os.path.basename(filename), no=no),
-                    None, None, normalize_sbb(line))
+                    None,
+                    None,
+                    normalize_sbb(line),
+                )
                 for no, line in enumerate(f.readlines())
             ],
             "\n",
diff --git a/qurator/dinglehopper/ocrd_cli.py b/qurator/dinglehopper/ocrd_cli.py
index 7c513e6..9578a0a 100644
--- a/qurator/dinglehopper/ocrd_cli.py
+++ b/qurator/dinglehopper/ocrd_cli.py
@@ -33,7 +33,7 @@ class OcrdDinglehopperEvaluate(Processor):
         textequiv_level = self.parameter["textequiv_level"]
         gt_grp, ocr_grp = self.input_file_grp.split(",")
 
-        input_file_tuples = self.zip_input_files(on_error='abort')
+        input_file_tuples = self.zip_input_files(on_error="abort")
         for n, (gt_file, ocr_file) in enumerate(input_file_tuples):
             if not gt_file or not ocr_file:
                 # file/page was not found in this group
diff --git a/qurator/dinglehopper/tests/test_integ_ocrd_cli.py b/qurator/dinglehopper/tests/test_integ_ocrd_cli.py
index 8aff22d..652b850 100644
--- a/qurator/dinglehopper/tests/test_integ_ocrd_cli.py
+++ b/qurator/dinglehopper/tests/test_integ_ocrd_cli.py
@@ -15,7 +15,7 @@ data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
 
 
 @pytest.mark.integration
-@pytest.mark.skipif(sys.platform == 'win32', reason="only on unix")
+@pytest.mark.skipif(sys.platform == "win32", reason="only on unix")
 def test_ocrd_cli(tmp_path):
     """Test OCR-D interface"""
 
diff --git a/qurator/dinglehopper/word_error_rate.py b/qurator/dinglehopper/word_error_rate.py
index 8f0cc96..64dc36c 100644
--- a/qurator/dinglehopper/word_error_rate.py
+++ b/qurator/dinglehopper/word_error_rate.py
@@ -42,10 +42,8 @@ def words(s: str):
     if not word_break_patched:
         patch_word_break()
 
-
     # Check if c is an unwanted character, i.e. whitespace, punctuation, or similar
     def unwanted(c):
-
         # See https://www.fileformat.info/info/unicode/category/index.htm
         # and https://unicodebook.readthedocs.io/unicode.html#categories
         unwanted_categories = "O", "M", "P", "Z", "S"
diff --git a/setup.py b/setup.py
index be17cc6..646a50f 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ from setuptools import find_packages, setup
 with open("requirements.txt") as fp:
     install_requires = fp.read()
 
-with open('requirements-dev.txt') as fp:
+with open("requirements-dev.txt") as fp:
     tests_require = fp.read()
 
 setup(