diff --git a/qurator/dinglehopper/align.py b/qurator/dinglehopper/align.py index ab44760..87febb7 100644 --- a/qurator/dinglehopper/align.py +++ b/qurator/dinglehopper/align.py @@ -28,16 +28,16 @@ def seq_align(s1, s2): if o: if o[0] == 'insert': - yield (None, s2[j]) + yield None, s2[j] j += 1 elif o[0] == 'delete': - yield (s1[i], None) + yield s1[i], None i += 1 elif o[0] == 'replace': - yield (s1[i], s2[j]) + yield s1[i], s2[j] i += 1 j += 1 else: - yield (s1[i], s2[j]) + yield s1[i], s2[j] i += 1 j += 1 diff --git a/qurator/dinglehopper/edit_distance.py b/qurator/dinglehopper/edit_distance.py index a6643c7..284b676 100644 --- a/qurator/dinglehopper/edit_distance.py +++ b/qurator/dinglehopper/edit_distance.py @@ -8,7 +8,6 @@ import numpy as np from uniseg.graphemecluster import grapheme_clusters - def levenshtein_matrix(seq1: Sequence, seq2: Sequence): """Compute the matrix commonly computed to produce the Levenshtein distance. This is also known as the Wagner-Fischer algorithm. The matrix element at the bottom right contains the desired diff --git a/qurator/dinglehopper/ocr_files.py b/qurator/dinglehopper/ocr_files.py index d3918d1..a048b1e 100644 --- a/qurator/dinglehopper/ocr_files.py +++ b/qurator/dinglehopper/ocr_files.py @@ -1,5 +1,6 @@ from __future__ import division, print_function +from typing import Optional from warnings import warn from lxml import etree as ET @@ -58,21 +59,24 @@ def normalize(text, normalization): # XXX hack -normalize_sbb = lambda t: normalize(t, Normalization.NFC_SBB) +def normalize_sbb(t): + return normalize(t, Normalization.NFC_SBB) @attr.s(frozen=True) class ExtractedTextSegment: - segment_id = attr.ib(type=str) + segment_id = attr.ib(type=Optional[str]) + @segment_id.validator - def check(self, attribute, value): + def check(self, _, value): if value is None: return if not re.match(r'[\w\d_-]+', value): raise ValueError('Malformed segment id "{}"'.format(value)) text = attr.ib(type=str) + @text.validator - def check(self, attribute, value): + def check(self, _, value): if value is not None and normalize(value, self.normalization) != value: raise ValueError('String "{}" is not normalized.'.format(value)) normalization = attr.ib(converter=Normalization, default=Normalization.NFC_SBB) diff --git a/qurator/dinglehopper/tests/test_integ_ocrd_cli.py b/qurator/dinglehopper/tests/test_integ_ocrd_cli.py index 3d78f57..75bb816 100644 --- a/qurator/dinglehopper/tests/test_integ_ocrd_cli.py +++ b/qurator/dinglehopper/tests/test_integ_ocrd_cli.py @@ -1,11 +1,9 @@ import os -import re import shutil import json from pathlib import Path from click.testing import CliRunner -import pytest from .util import working_directory diff --git a/qurator/dinglehopper/tests/util.py b/qurator/dinglehopper/tests/util.py index 52b7506..1f224e5 100644 --- a/qurator/dinglehopper/tests/util.py +++ b/qurator/dinglehopper/tests/util.py @@ -21,8 +21,8 @@ def diffprint(x, y): _diffprint(x, y) -def unzip(l): - return zip(*l) +def unzip(an_iterable_of_tuples): + return zip(*an_iterable_of_tuples) class working_directory: