From 5aa74e83831ac9c8b8008fec82a4fa597567e85c Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 12 Jun 2020 20:59:37 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=A8=20dinglehopper:=20Make=20PyCharm?= =?UTF-8?q?=20happier=20with=20the=20type=20hinting,=20newlines=20etc.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/dinglehopper/align.py | 8 ++++---- qurator/dinglehopper/edit_distance.py | 1 - qurator/dinglehopper/ocr_files.py | 12 ++++++++---- qurator/dinglehopper/tests/test_integ_ocrd_cli.py | 2 -- qurator/dinglehopper/tests/util.py | 4 ++-- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/qurator/dinglehopper/align.py b/qurator/dinglehopper/align.py index ab44760..87febb7 100644 --- a/qurator/dinglehopper/align.py +++ b/qurator/dinglehopper/align.py @@ -28,16 +28,16 @@ def seq_align(s1, s2): if o: if o[0] == 'insert': - yield (None, s2[j]) + yield None, s2[j] j += 1 elif o[0] == 'delete': - yield (s1[i], None) + yield s1[i], None i += 1 elif o[0] == 'replace': - yield (s1[i], s2[j]) + yield s1[i], s2[j] i += 1 j += 1 else: - yield (s1[i], s2[j]) + yield s1[i], s2[j] i += 1 j += 1 diff --git a/qurator/dinglehopper/edit_distance.py b/qurator/dinglehopper/edit_distance.py index a6643c7..284b676 100644 --- a/qurator/dinglehopper/edit_distance.py +++ b/qurator/dinglehopper/edit_distance.py @@ -8,7 +8,6 @@ import numpy as np from uniseg.graphemecluster import grapheme_clusters - def levenshtein_matrix(seq1: Sequence, seq2: Sequence): """Compute the matrix commonly computed to produce the Levenshtein distance. This is also known as the Wagner-Fischer algorithm. The matrix element at the bottom right contains the desired diff --git a/qurator/dinglehopper/ocr_files.py b/qurator/dinglehopper/ocr_files.py index d3918d1..a048b1e 100644 --- a/qurator/dinglehopper/ocr_files.py +++ b/qurator/dinglehopper/ocr_files.py @@ -1,5 +1,6 @@ from __future__ import division, print_function +from typing import Optional from warnings import warn from lxml import etree as ET @@ -58,21 +59,24 @@ def normalize(text, normalization): # XXX hack -normalize_sbb = lambda t: normalize(t, Normalization.NFC_SBB) +def normalize_sbb(t): + return normalize(t, Normalization.NFC_SBB) @attr.s(frozen=True) class ExtractedTextSegment: - segment_id = attr.ib(type=str) + segment_id = attr.ib(type=Optional[str]) + @segment_id.validator - def check(self, attribute, value): + def check(self, _, value): if value is None: return if not re.match(r'[\w\d_-]+', value): raise ValueError('Malformed segment id "{}"'.format(value)) text = attr.ib(type=str) + @text.validator - def check(self, attribute, value): + def check(self, _, value): if value is not None and normalize(value, self.normalization) != value: raise ValueError('String "{}" is not normalized.'.format(value)) normalization = attr.ib(converter=Normalization, default=Normalization.NFC_SBB) diff --git a/qurator/dinglehopper/tests/test_integ_ocrd_cli.py b/qurator/dinglehopper/tests/test_integ_ocrd_cli.py index 3d78f57..75bb816 100644 --- a/qurator/dinglehopper/tests/test_integ_ocrd_cli.py +++ b/qurator/dinglehopper/tests/test_integ_ocrd_cli.py @@ -1,11 +1,9 @@ import os -import re import shutil import json from pathlib import Path from click.testing import CliRunner -import pytest from .util import working_directory diff --git a/qurator/dinglehopper/tests/util.py b/qurator/dinglehopper/tests/util.py index 52b7506..1f224e5 100644 --- a/qurator/dinglehopper/tests/util.py +++ b/qurator/dinglehopper/tests/util.py @@ -21,8 +21,8 @@ def diffprint(x, y): _diffprint(x, y) -def unzip(l): - return zip(*l) +def unzip(an_iterable_of_tuples): + return zip(*an_iterable_of_tuples) class working_directory: