mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-07-03 23:49:57 +02:00
🎨 Sort imports (auto-fixed by ruff)
This commit is contained in:
parent
5b20fb24a1
commit
d50d624554
23 changed files with 40 additions and 51 deletions
|
@ -1,5 +1,5 @@
|
||||||
from .ocr_files import *
|
|
||||||
from .extracted_text import *
|
|
||||||
from .character_error_rate import *
|
|
||||||
from .word_error_rate import *
|
|
||||||
from .align import *
|
from .align import *
|
||||||
|
from .character_error_rate import *
|
||||||
|
from .extracted_text import *
|
||||||
|
from .ocr_files import *
|
||||||
|
from .word_error_rate import *
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from .edit_distance import *
|
|
||||||
from rapidfuzz.distance import Levenshtein
|
from rapidfuzz.distance import Levenshtein
|
||||||
|
|
||||||
|
from .edit_distance import *
|
||||||
|
|
||||||
|
|
||||||
def align(t1, t2):
|
def align(t1, t2):
|
||||||
"""Align text."""
|
"""Align text."""
|
||||||
|
|
|
@ -4,15 +4,15 @@ from collections import Counter
|
||||||
import click
|
import click
|
||||||
from jinja2 import Environment, FileSystemLoader
|
from jinja2 import Environment, FileSystemLoader
|
||||||
from markupsafe import escape
|
from markupsafe import escape
|
||||||
from uniseg.graphemecluster import grapheme_clusters
|
|
||||||
from ocrd_utils import initLogging
|
from ocrd_utils import initLogging
|
||||||
|
from uniseg.graphemecluster import grapheme_clusters
|
||||||
|
|
||||||
from dinglehopper.character_error_rate import character_error_rate_n
|
|
||||||
from dinglehopper.word_error_rate import word_error_rate_n, words_normalized
|
|
||||||
from dinglehopper.align import seq_align
|
from dinglehopper.align import seq_align
|
||||||
|
from dinglehopper.character_error_rate import character_error_rate_n
|
||||||
|
from dinglehopper.config import Config
|
||||||
from dinglehopper.extracted_text import ExtractedText
|
from dinglehopper.extracted_text import ExtractedText
|
||||||
from dinglehopper.ocr_files import extract
|
from dinglehopper.ocr_files import extract
|
||||||
from dinglehopper.config import Config
|
from dinglehopper.word_error_rate import word_error_rate_n, words_normalized
|
||||||
|
|
||||||
|
|
||||||
def gen_diff_report(gt_in, ocr_in, css_prefix, joiner, none, differences=False):
|
def gen_diff_report(gt_in, ocr_in, css_prefix, joiner, none, differences=False):
|
||||||
|
|
|
@ -1,9 +1,7 @@
|
||||||
import os
|
|
||||||
|
|
||||||
import click
|
import click
|
||||||
from ocrd_utils import initLogging
|
from ocrd_utils import initLogging
|
||||||
|
|
||||||
from .extracted_text import ExtractedText
|
|
||||||
from .ocr_files import extract
|
from .ocr_files import extract
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,20 +1,14 @@
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import itertools
|
import itertools
|
||||||
|
import os
|
||||||
|
|
||||||
import click
|
import click
|
||||||
from jinja2 import Environment, FileSystemLoader
|
from jinja2 import Environment, FileSystemLoader
|
||||||
from markupsafe import escape
|
|
||||||
from uniseg.graphemecluster import grapheme_clusters
|
|
||||||
from ocrd_utils import initLogging
|
from ocrd_utils import initLogging
|
||||||
|
|
||||||
from .character_error_rate import character_error_rate_n
|
from .character_error_rate import character_error_rate_n
|
||||||
from .word_error_rate import word_error_rate_n, words_normalized
|
|
||||||
from .align import seq_align
|
|
||||||
from .extracted_text import ExtractedText
|
|
||||||
from .ocr_files import plain_extract
|
|
||||||
from .config import Config
|
|
||||||
from .cli import gen_diff_report, json_float
|
from .cli import gen_diff_report, json_float
|
||||||
|
from .ocr_files import plain_extract
|
||||||
|
from .word_error_rate import word_error_rate_n, words_normalized
|
||||||
|
|
||||||
|
|
||||||
def all_equal(iterable):
|
def all_equal(iterable):
|
||||||
|
|
|
@ -2,8 +2,8 @@ import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import click
|
import click
|
||||||
from ocrd_utils import initLogging
|
|
||||||
from jinja2 import Environment, FileSystemLoader
|
from jinja2 import Environment, FileSystemLoader
|
||||||
|
from ocrd_utils import initLogging
|
||||||
|
|
||||||
from dinglehopper.cli import json_float
|
from dinglehopper.cli import json_float
|
||||||
|
|
||||||
|
|
|
@ -1,17 +1,12 @@
|
||||||
from __future__ import division, print_function
|
from __future__ import division, print_function
|
||||||
|
|
||||||
import unicodedata
|
import unicodedata
|
||||||
from functools import partial, lru_cache
|
|
||||||
from typing import Sequence, Tuple
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from multimethod import multimethod
|
from multimethod import multimethod
|
||||||
from uniseg.graphemecluster import grapheme_clusters
|
|
||||||
from tqdm import tqdm
|
|
||||||
from rapidfuzz.distance import Levenshtein
|
from rapidfuzz.distance import Levenshtein
|
||||||
|
from uniseg.graphemecluster import grapheme_clusters
|
||||||
|
|
||||||
from .extracted_text import ExtractedText
|
from .extracted_text import ExtractedText
|
||||||
from .config import Config
|
|
||||||
|
|
||||||
|
|
||||||
@multimethod
|
@multimethod
|
||||||
|
|
|
@ -3,11 +3,10 @@ from __future__ import division, print_function
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from typing import Iterator
|
from typing import Iterator
|
||||||
from warnings import warn
|
|
||||||
|
|
||||||
|
import chardet
|
||||||
from lxml import etree as ET
|
from lxml import etree as ET
|
||||||
from lxml.etree import XMLSyntaxError
|
from lxml.etree import XMLSyntaxError
|
||||||
import chardet
|
|
||||||
|
|
||||||
from .extracted_text import ExtractedText, normalize_sbb
|
from .extracted_text import ExtractedText, normalize_sbb
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@ import os
|
||||||
import click
|
import click
|
||||||
from ocrd import Processor
|
from ocrd import Processor
|
||||||
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
|
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
|
||||||
from ocrd_utils import getLogger, make_file_id, assert_file_grp_cardinality
|
from ocrd_utils import assert_file_grp_cardinality, getLogger, make_file_id
|
||||||
from pkg_resources import resource_string
|
from pkg_resources import resource_string
|
||||||
|
|
||||||
from .cli import process as cli_process
|
from .cli import process as cli_process
|
||||||
|
|
|
@ -6,7 +6,7 @@ import pytest
|
||||||
from lxml import etree as ET
|
from lxml import etree as ET
|
||||||
from uniseg.graphemecluster import grapheme_clusters
|
from uniseg.graphemecluster import grapheme_clusters
|
||||||
|
|
||||||
from .. import seq_align, ExtractedText
|
from .. import ExtractedText, seq_align
|
||||||
|
|
||||||
|
|
||||||
def test_text():
|
def test_text():
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from .. import align, distance, seq_align
|
||||||
from .util import unzip
|
from .util import unzip
|
||||||
from .. import align, seq_align, distance
|
|
||||||
|
|
||||||
|
|
||||||
def test_left_empty():
|
def test_left_empty():
|
||||||
|
|
|
@ -4,9 +4,8 @@ import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from lxml import etree as ET
|
from lxml import etree as ET
|
||||||
from uniseg.graphemecluster import grapheme_clusters
|
|
||||||
|
|
||||||
from .. import character_error_rate, page_text, alto_text
|
from .. import alto_text, character_error_rate, page_text
|
||||||
|
|
||||||
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ import pytest
|
||||||
from lxml import etree as ET
|
from lxml import etree as ET
|
||||||
from uniseg.graphemecluster import grapheme_clusters
|
from uniseg.graphemecluster import grapheme_clusters
|
||||||
|
|
||||||
from .. import character_error_rate, page_text, alto_text
|
from .. import alto_text, character_error_rate, page_text
|
||||||
|
|
||||||
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from ocrd_utils import initLogging
|
from ocrd_utils import initLogging
|
||||||
|
|
||||||
from dinglehopper.cli import process_dir
|
from dinglehopper.cli import process_dir
|
||||||
|
|
||||||
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
import json
|
import json
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from .util import working_directory
|
|
||||||
|
|
||||||
from ..cli import process
|
from ..cli import process
|
||||||
|
from .util import working_directory
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.integration
|
@pytest.mark.integration
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from ocrd_utils import initLogging
|
from ocrd_utils import initLogging
|
||||||
|
|
||||||
from dinglehopper.cli import process
|
from dinglehopper.cli import process
|
||||||
|
|
||||||
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
||||||
|
|
|
@ -5,7 +5,7 @@ import os
|
||||||
import pytest
|
import pytest
|
||||||
from lxml import etree as ET
|
from lxml import etree as ET
|
||||||
|
|
||||||
from .. import distance, page_text, alto_text
|
from .. import alto_text, distance, page_text
|
||||||
|
|
||||||
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
||||||
|
|
||||||
|
|
|
@ -1,15 +1,14 @@
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import json
|
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from click.testing import CliRunner
|
from click.testing import CliRunner
|
||||||
from .util import working_directory
|
|
||||||
|
|
||||||
|
|
||||||
from ..ocrd_cli import ocrd_dinglehopper
|
from ..ocrd_cli import ocrd_dinglehopper
|
||||||
|
from .util import working_directory
|
||||||
|
|
||||||
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from .util import working_directory
|
|
||||||
from .. import cli_summarize
|
from .. import cli_summarize
|
||||||
|
from .util import working_directory
|
||||||
|
|
||||||
expected_cer_avg = (0.05 + 0.10) / 2
|
expected_cer_avg = (0.05 + 0.10) / 2
|
||||||
expected_wer_avg = (0.15 + 0.20) / 2
|
expected_wer_avg = (0.15 + 0.20) / 2
|
||||||
|
|
|
@ -5,7 +5,7 @@ import os
|
||||||
import pytest
|
import pytest
|
||||||
from lxml import etree as ET
|
from lxml import etree as ET
|
||||||
|
|
||||||
from .. import word_error_rate, words, page_text, alto_text
|
from .. import alto_text, page_text, word_error_rate, words
|
||||||
|
|
||||||
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,11 @@
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import lxml.etree as ET
|
|
||||||
import textwrap
|
import textwrap
|
||||||
|
|
||||||
import pytest
|
import lxml.etree as ET
|
||||||
|
|
||||||
from .util import working_directory
|
|
||||||
from .. import alto_namespace, alto_text, page_namespace, page_text, plain_text, text
|
from .. import alto_namespace, alto_text, page_namespace, page_text, plain_text, text
|
||||||
|
from .util import working_directory
|
||||||
|
|
||||||
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
|
import os
|
||||||
from itertools import zip_longest
|
from itertools import zip_longest
|
||||||
from typing import Iterable
|
from typing import Iterable
|
||||||
|
|
||||||
import colorama
|
import colorama
|
||||||
import os
|
|
||||||
|
|
||||||
|
|
||||||
def diffprint(x, y):
|
def diffprint(x, y):
|
||||||
|
|
|
@ -1,14 +1,13 @@
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
|
|
||||||
import unicodedata
|
import unicodedata
|
||||||
from typing import Tuple, Iterable
|
from typing import Iterable, Tuple
|
||||||
from multimethod import multimethod
|
|
||||||
|
|
||||||
import uniseg.wordbreak
|
import uniseg.wordbreak
|
||||||
|
from multimethod import multimethod
|
||||||
from rapidfuzz.distance import Levenshtein
|
from rapidfuzz.distance import Levenshtein
|
||||||
from . import ExtractedText
|
|
||||||
|
|
||||||
|
from . import ExtractedText
|
||||||
|
|
||||||
# Did we patch uniseg.wordbreak.word_break already?
|
# Did we patch uniseg.wordbreak.word_break already?
|
||||||
word_break_patched = False
|
word_break_patched = False
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue