🎨 dinglehopper: Make PyCharm happier with the type hinting, newlines etc.

2026-07-04 10:49:16 +02:00 · 2020-06-12 20:59:37 +02:00 · 2020-06-12 20:59:37 +02:00 · 5aa74e8383
commit 5aa74e8383
parent e972328e51
5 changed files with 14 additions and 13 deletions
--- a/qurator/dinglehopper/align.py
+++ b/qurator/dinglehopper/align.py
@ -28,16 +28,16 @@ def seq_align(s1, s2):
        if o:
            if o[0] == 'insert':
-                yield (None, s2[j])
+                yield None, s2[j]
                j += 1
            elif o[0] == 'delete':
-                yield (s1[i], None)
+                yield s1[i], None
                i += 1
            elif o[0] == 'replace':
-                yield (s1[i], s2[j])
+                yield s1[i], s2[j]
                i += 1
                j += 1
        else:
-            yield (s1[i], s2[j])
+            yield s1[i], s2[j]
            i += 1
            j += 1
--- a/qurator/dinglehopper/edit_distance.py
+++ b/qurator/dinglehopper/edit_distance.py
@ -8,7 +8,6 @@ import numpy as np
 from uniseg.graphemecluster import grapheme_clusters
 def levenshtein_matrix(seq1: Sequence, seq2: Sequence):
    """Compute the matrix commonly computed to produce the Levenshtein distance.
    This is also known as the Wagner-Fischer algorithm. The matrix element at the bottom right contains the desired
--- a/qurator/dinglehopper/ocr_files.py
+++ b/qurator/dinglehopper/ocr_files.py
@ -1,5 +1,6 @@
 from __future__ import division, print_function
 from typing import Optional
 from warnings import warn
 from lxml import etree as ET
@ -58,21 +59,24 @@ def normalize(text, normalization):
 # XXX hack
-normalize_sbb = lambda t: normalize(t, Normalization.NFC_SBB)
+def normalize_sbb(t):
    return normalize(t, Normalization.NFC_SBB)
@attr.s(frozen=True)
 class ExtractedTextSegment:
-    segment_id = attr.ib(type=str)
+    segment_id = attr.ib(type=Optional[str])
    @segment_id.validator
-    def check(self, attribute, value):
+    def check(self, _, value):
        if value is None:
            return
        if not re.match(r'[\w\d_-]+', value):
            raise ValueError('Malformed segment id "{}"'.format(value))
    text = attr.ib(type=str)
    @text.validator
-    def check(self, attribute, value):
+    def check(self, _, value):
        if value is not None and normalize(value, self.normalization) != value:
            raise ValueError('String "{}" is not normalized.'.format(value))
    normalization = attr.ib(converter=Normalization, default=Normalization.NFC_SBB)
--- a/qurator/dinglehopper/tests/test_integ_ocrd_cli.py
+++ b/qurator/dinglehopper/tests/test_integ_ocrd_cli.py
@ -1,11 +1,9 @@
 import os
 import re
 import shutil
 import json
 from pathlib import Path
 from click.testing import CliRunner
 import pytest
 from .util import working_directory
--- a/qurator/dinglehopper/tests/util.py
+++ b/qurator/dinglehopper/tests/util.py
@ -21,8 +21,8 @@ def diffprint(x, y):
        _diffprint(x, y)
-def unzip(l):
+def unzip(an_iterable_of_tuples):
-    return zip(*l)
+    return zip(*an_iterable_of_tuples)
 class working_directory: