diff --git a/src/dinglehopper/__init__.py b/src/dinglehopper/__init__.py index 8e58101..fecf0ea 100644 --- a/src/dinglehopper/__init__.py +++ b/src/dinglehopper/__init__.py @@ -1,5 +1,5 @@ -from .ocr_files import * -from .extracted_text import * +from .align import * from .character_error_rate import * +from .extracted_text import * +from .ocr_files import * from .word_error_rate import * -from .align import * diff --git a/src/dinglehopper/align.py b/src/dinglehopper/align.py index cc96891..fbc4d28 100644 --- a/src/dinglehopper/align.py +++ b/src/dinglehopper/align.py @@ -1,6 +1,7 @@ -from .edit_distance import * from rapidfuzz.distance import Levenshtein +from .edit_distance import * + def align(t1, t2): """Align text.""" diff --git a/src/dinglehopper/cli.py b/src/dinglehopper/cli.py index b22aafc..7a8f484 100644 --- a/src/dinglehopper/cli.py +++ b/src/dinglehopper/cli.py @@ -4,15 +4,15 @@ from collections import Counter import click from jinja2 import Environment, FileSystemLoader from markupsafe import escape -from uniseg.graphemecluster import grapheme_clusters from ocrd_utils import initLogging +from uniseg.graphemecluster import grapheme_clusters -from dinglehopper.character_error_rate import character_error_rate_n -from dinglehopper.word_error_rate import word_error_rate_n, words_normalized from dinglehopper.align import seq_align +from dinglehopper.character_error_rate import character_error_rate_n +from dinglehopper.config import Config from dinglehopper.extracted_text import ExtractedText from dinglehopper.ocr_files import extract -from dinglehopper.config import Config +from dinglehopper.word_error_rate import word_error_rate_n, words_normalized def gen_diff_report(gt_in, ocr_in, css_prefix, joiner, none, differences=False): diff --git a/src/dinglehopper/cli_extract.py b/src/dinglehopper/cli_extract.py index 0d4f713..3c35b1b 100644 --- a/src/dinglehopper/cli_extract.py +++ b/src/dinglehopper/cli_extract.py @@ -1,9 +1,7 @@ -import os import click from ocrd_utils import initLogging -from .extracted_text import ExtractedText from .ocr_files import extract diff --git a/src/dinglehopper/cli_line_dirs.py b/src/dinglehopper/cli_line_dirs.py index 950f668..5fc3754 100644 --- a/src/dinglehopper/cli_line_dirs.py +++ b/src/dinglehopper/cli_line_dirs.py @@ -1,20 +1,14 @@ -import os -import sys import itertools +import os import click from jinja2 import Environment, FileSystemLoader -from markupsafe import escape -from uniseg.graphemecluster import grapheme_clusters from ocrd_utils import initLogging from .character_error_rate import character_error_rate_n -from .word_error_rate import word_error_rate_n, words_normalized -from .align import seq_align -from .extracted_text import ExtractedText -from .ocr_files import plain_extract -from .config import Config from .cli import gen_diff_report, json_float +from .ocr_files import plain_extract +from .word_error_rate import word_error_rate_n, words_normalized def all_equal(iterable): diff --git a/src/dinglehopper/cli_summarize.py b/src/dinglehopper/cli_summarize.py index 1cf1c91..3262371 100644 --- a/src/dinglehopper/cli_summarize.py +++ b/src/dinglehopper/cli_summarize.py @@ -2,8 +2,8 @@ import json import os import click -from ocrd_utils import initLogging from jinja2 import Environment, FileSystemLoader +from ocrd_utils import initLogging from dinglehopper.cli import json_float diff --git a/src/dinglehopper/edit_distance.py b/src/dinglehopper/edit_distance.py index 24f6928..e5194bf 100644 --- a/src/dinglehopper/edit_distance.py +++ b/src/dinglehopper/edit_distance.py @@ -1,17 +1,12 @@ from __future__ import division, print_function import unicodedata -from functools import partial, lru_cache -from typing import Sequence, Tuple -import numpy as np from multimethod import multimethod -from uniseg.graphemecluster import grapheme_clusters -from tqdm import tqdm from rapidfuzz.distance import Levenshtein +from uniseg.graphemecluster import grapheme_clusters from .extracted_text import ExtractedText -from .config import Config @multimethod diff --git a/src/dinglehopper/ocr_files.py b/src/dinglehopper/ocr_files.py index 42a085f..da7b973 100644 --- a/src/dinglehopper/ocr_files.py +++ b/src/dinglehopper/ocr_files.py @@ -3,11 +3,10 @@ from __future__ import division, print_function import os import sys from typing import Iterator -from warnings import warn +import chardet from lxml import etree as ET from lxml.etree import XMLSyntaxError -import chardet from .extracted_text import ExtractedText, normalize_sbb diff --git a/src/dinglehopper/ocrd_cli.py b/src/dinglehopper/ocrd_cli.py index c5f79cd..8eebdc0 100644 --- a/src/dinglehopper/ocrd_cli.py +++ b/src/dinglehopper/ocrd_cli.py @@ -4,7 +4,7 @@ import os import click from ocrd import Processor from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor -from ocrd_utils import getLogger, make_file_id, assert_file_grp_cardinality +from ocrd_utils import assert_file_grp_cardinality, getLogger, make_file_id from pkg_resources import resource_string from .cli import process as cli_process diff --git a/src/dinglehopper/tests/extracted_text_test.py b/src/dinglehopper/tests/extracted_text_test.py index 8a81587..ae85735 100644 --- a/src/dinglehopper/tests/extracted_text_test.py +++ b/src/dinglehopper/tests/extracted_text_test.py @@ -6,7 +6,7 @@ import pytest from lxml import etree as ET from uniseg.graphemecluster import grapheme_clusters -from .. import seq_align, ExtractedText +from .. import ExtractedText, seq_align def test_text(): diff --git a/src/dinglehopper/tests/test_align.py b/src/dinglehopper/tests/test_align.py index 96fc3c2..d8b051b 100644 --- a/src/dinglehopper/tests/test_align.py +++ b/src/dinglehopper/tests/test_align.py @@ -1,6 +1,7 @@ import pytest + +from .. import align, distance, seq_align from .util import unzip -from .. import align, seq_align, distance def test_left_empty(): diff --git a/src/dinglehopper/tests/test_integ_bigger_texts.py b/src/dinglehopper/tests/test_integ_bigger_texts.py index e069485..fd3871f 100644 --- a/src/dinglehopper/tests/test_integ_bigger_texts.py +++ b/src/dinglehopper/tests/test_integ_bigger_texts.py @@ -4,9 +4,8 @@ import os import pytest from lxml import etree as ET -from uniseg.graphemecluster import grapheme_clusters -from .. import character_error_rate, page_text, alto_text +from .. import alto_text, character_error_rate, page_text data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") diff --git a/src/dinglehopper/tests/test_integ_character_error_rate_ocr.py b/src/dinglehopper/tests/test_integ_character_error_rate_ocr.py index e307a84..7755e2d 100644 --- a/src/dinglehopper/tests/test_integ_character_error_rate_ocr.py +++ b/src/dinglehopper/tests/test_integ_character_error_rate_ocr.py @@ -6,7 +6,7 @@ import pytest from lxml import etree as ET from uniseg.graphemecluster import grapheme_clusters -from .. import character_error_rate, page_text, alto_text +from .. import alto_text, character_error_rate, page_text data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") diff --git a/src/dinglehopper/tests/test_integ_cli_dir.py b/src/dinglehopper/tests/test_integ_cli_dir.py index 435b452..7f3196c 100644 --- a/src/dinglehopper/tests/test_integ_cli_dir.py +++ b/src/dinglehopper/tests/test_integ_cli_dir.py @@ -1,6 +1,8 @@ import os + import pytest from ocrd_utils import initLogging + from dinglehopper.cli import process_dir data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") diff --git a/src/dinglehopper/tests/test_integ_cli_valid_json.py b/src/dinglehopper/tests/test_integ_cli_valid_json.py index 7b35bd7..6cbfa0c 100644 --- a/src/dinglehopper/tests/test_integ_cli_valid_json.py +++ b/src/dinglehopper/tests/test_integ_cli_valid_json.py @@ -1,9 +1,9 @@ import json import pytest -from .util import working_directory from ..cli import process +from .util import working_directory @pytest.mark.integration diff --git a/src/dinglehopper/tests/test_integ_differences.py b/src/dinglehopper/tests/test_integ_differences.py index 3590317..19cb9d1 100644 --- a/src/dinglehopper/tests/test_integ_differences.py +++ b/src/dinglehopper/tests/test_integ_differences.py @@ -1,7 +1,9 @@ import json import os + import pytest from ocrd_utils import initLogging + from dinglehopper.cli import process data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") diff --git a/src/dinglehopper/tests/test_integ_edit_distance_ocr.py b/src/dinglehopper/tests/test_integ_edit_distance_ocr.py index 0e1e7da..e01ac76 100644 --- a/src/dinglehopper/tests/test_integ_edit_distance_ocr.py +++ b/src/dinglehopper/tests/test_integ_edit_distance_ocr.py @@ -5,7 +5,7 @@ import os import pytest from lxml import etree as ET -from .. import distance, page_text, alto_text +from .. import alto_text, distance, page_text data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") diff --git a/src/dinglehopper/tests/test_integ_ocrd_cli.py b/src/dinglehopper/tests/test_integ_ocrd_cli.py index 652b850..b30d2b0 100644 --- a/src/dinglehopper/tests/test_integ_ocrd_cli.py +++ b/src/dinglehopper/tests/test_integ_ocrd_cli.py @@ -1,15 +1,14 @@ +import json import os import shutil -import json import sys from pathlib import Path import pytest from click.testing import CliRunner -from .util import working_directory - from ..ocrd_cli import ocrd_dinglehopper +from .util import working_directory data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") diff --git a/src/dinglehopper/tests/test_integ_summarize.py b/src/dinglehopper/tests/test_integ_summarize.py index 0908152..d4a4900 100644 --- a/src/dinglehopper/tests/test_integ_summarize.py +++ b/src/dinglehopper/tests/test_integ_summarize.py @@ -1,8 +1,10 @@ import json import os + import pytest -from .util import working_directory + from .. import cli_summarize +from .util import working_directory expected_cer_avg = (0.05 + 0.10) / 2 expected_wer_avg = (0.15 + 0.20) / 2 diff --git a/src/dinglehopper/tests/test_integ_word_error_rate_ocr.py b/src/dinglehopper/tests/test_integ_word_error_rate_ocr.py index ba865b4..65b2f54 100644 --- a/src/dinglehopper/tests/test_integ_word_error_rate_ocr.py +++ b/src/dinglehopper/tests/test_integ_word_error_rate_ocr.py @@ -5,7 +5,7 @@ import os import pytest from lxml import etree as ET -from .. import word_error_rate, words, page_text, alto_text +from .. import alto_text, page_text, word_error_rate, words data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") diff --git a/src/dinglehopper/tests/test_ocr_files.py b/src/dinglehopper/tests/test_ocr_files.py index 57c3f4a..4a1f485 100644 --- a/src/dinglehopper/tests/test_ocr_files.py +++ b/src/dinglehopper/tests/test_ocr_files.py @@ -1,13 +1,11 @@ import os import re - -import lxml.etree as ET import textwrap -import pytest +import lxml.etree as ET -from .util import working_directory from .. import alto_namespace, alto_text, page_namespace, page_text, plain_text, text +from .util import working_directory data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") diff --git a/src/dinglehopper/tests/util.py b/src/dinglehopper/tests/util.py index 8a735aa..e44ddc0 100644 --- a/src/dinglehopper/tests/util.py +++ b/src/dinglehopper/tests/util.py @@ -1,8 +1,8 @@ +import os from itertools import zip_longest from typing import Iterable import colorama -import os def diffprint(x, y): diff --git a/src/dinglehopper/word_error_rate.py b/src/dinglehopper/word_error_rate.py index 64dc36c..470bf1f 100644 --- a/src/dinglehopper/word_error_rate.py +++ b/src/dinglehopper/word_error_rate.py @@ -1,14 +1,13 @@ from __future__ import division import unicodedata -from typing import Tuple, Iterable -from multimethod import multimethod +from typing import Iterable, Tuple import uniseg.wordbreak - +from multimethod import multimethod from rapidfuzz.distance import Levenshtein -from . import ExtractedText +from . import ExtractedText # Did we patch uniseg.wordbreak.word_break already? word_break_patched = False