mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-07 19:05:13 +02:00
🎨 dinglehopper: Make PyCharm happier with the type hinting, newlines etc.
This commit is contained in:
parent
e972328e51
commit
5aa74e8383
5 changed files with 14 additions and 13 deletions
|
@ -28,16 +28,16 @@ def seq_align(s1, s2):
|
||||||
|
|
||||||
if o:
|
if o:
|
||||||
if o[0] == 'insert':
|
if o[0] == 'insert':
|
||||||
yield (None, s2[j])
|
yield None, s2[j]
|
||||||
j += 1
|
j += 1
|
||||||
elif o[0] == 'delete':
|
elif o[0] == 'delete':
|
||||||
yield (s1[i], None)
|
yield s1[i], None
|
||||||
i += 1
|
i += 1
|
||||||
elif o[0] == 'replace':
|
elif o[0] == 'replace':
|
||||||
yield (s1[i], s2[j])
|
yield s1[i], s2[j]
|
||||||
i += 1
|
i += 1
|
||||||
j += 1
|
j += 1
|
||||||
else:
|
else:
|
||||||
yield (s1[i], s2[j])
|
yield s1[i], s2[j]
|
||||||
i += 1
|
i += 1
|
||||||
j += 1
|
j += 1
|
||||||
|
|
|
@ -8,7 +8,6 @@ import numpy as np
|
||||||
from uniseg.graphemecluster import grapheme_clusters
|
from uniseg.graphemecluster import grapheme_clusters
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def levenshtein_matrix(seq1: Sequence, seq2: Sequence):
|
def levenshtein_matrix(seq1: Sequence, seq2: Sequence):
|
||||||
"""Compute the matrix commonly computed to produce the Levenshtein distance.
|
"""Compute the matrix commonly computed to produce the Levenshtein distance.
|
||||||
This is also known as the Wagner-Fischer algorithm. The matrix element at the bottom right contains the desired
|
This is also known as the Wagner-Fischer algorithm. The matrix element at the bottom right contains the desired
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from __future__ import division, print_function
|
from __future__ import division, print_function
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
from warnings import warn
|
from warnings import warn
|
||||||
|
|
||||||
from lxml import etree as ET
|
from lxml import etree as ET
|
||||||
|
@ -58,21 +59,24 @@ def normalize(text, normalization):
|
||||||
|
|
||||||
|
|
||||||
# XXX hack
|
# XXX hack
|
||||||
normalize_sbb = lambda t: normalize(t, Normalization.NFC_SBB)
|
def normalize_sbb(t):
|
||||||
|
return normalize(t, Normalization.NFC_SBB)
|
||||||
|
|
||||||
|
|
||||||
@attr.s(frozen=True)
|
@attr.s(frozen=True)
|
||||||
class ExtractedTextSegment:
|
class ExtractedTextSegment:
|
||||||
segment_id = attr.ib(type=str)
|
segment_id = attr.ib(type=Optional[str])
|
||||||
|
|
||||||
@segment_id.validator
|
@segment_id.validator
|
||||||
def check(self, attribute, value):
|
def check(self, _, value):
|
||||||
if value is None:
|
if value is None:
|
||||||
return
|
return
|
||||||
if not re.match(r'[\w\d_-]+', value):
|
if not re.match(r'[\w\d_-]+', value):
|
||||||
raise ValueError('Malformed segment id "{}"'.format(value))
|
raise ValueError('Malformed segment id "{}"'.format(value))
|
||||||
text = attr.ib(type=str)
|
text = attr.ib(type=str)
|
||||||
|
|
||||||
@text.validator
|
@text.validator
|
||||||
def check(self, attribute, value):
|
def check(self, _, value):
|
||||||
if value is not None and normalize(value, self.normalization) != value:
|
if value is not None and normalize(value, self.normalization) != value:
|
||||||
raise ValueError('String "{}" is not normalized.'.format(value))
|
raise ValueError('String "{}" is not normalized.'.format(value))
|
||||||
normalization = attr.ib(converter=Normalization, default=Normalization.NFC_SBB)
|
normalization = attr.ib(converter=Normalization, default=Normalization.NFC_SBB)
|
||||||
|
|
|
@ -1,11 +1,9 @@
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
import shutil
|
import shutil
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from click.testing import CliRunner
|
from click.testing import CliRunner
|
||||||
import pytest
|
|
||||||
from .util import working_directory
|
from .util import working_directory
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -21,8 +21,8 @@ def diffprint(x, y):
|
||||||
_diffprint(x, y)
|
_diffprint(x, y)
|
||||||
|
|
||||||
|
|
||||||
def unzip(l):
|
def unzip(an_iterable_of_tuples):
|
||||||
return zip(*l)
|
return zip(*an_iterable_of_tuples)
|
||||||
|
|
||||||
|
|
||||||
class working_directory:
|
class working_directory:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue