diff --git a/src/dinglehopper/__init__.py b/src/dinglehopper/__init__.py index fecf0ea..0f6ab60 100644 --- a/src/dinglehopper/__init__.py +++ b/src/dinglehopper/__init__.py @@ -1,5 +1,32 @@ -from .align import * -from .character_error_rate import * -from .extracted_text import * -from .ocr_files import * -from .word_error_rate import * +from .align import align, seq_align +from .character_error_rate import character_error_rate, character_error_rate_n +from .edit_distance import distance, editops +from .extracted_text import ExtractedText +from .ocr_files import ( + alto_namespace, + alto_text, + page_namespace, + page_text, + plain_text, + text, +) +from .word_error_rate import word_error_rate, word_error_rate_n, words + +__all__ = [ + "editops", + "distance", + "align", + "seq_align", + "character_error_rate", + "character_error_rate_n", + "word_error_rate", + "word_error_rate_n", + "words", + "ExtractedText", + "alto_namespace", + "alto_text", + "page_namespace", + "page_text", + "plain_text", + "text", +] diff --git a/src/dinglehopper/align.py b/src/dinglehopper/align.py index fbc4d28..988ec9a 100644 --- a/src/dinglehopper/align.py +++ b/src/dinglehopper/align.py @@ -1,6 +1,8 @@ +import unicodedata + from rapidfuzz.distance import Levenshtein -from .edit_distance import * +from .edit_distance import grapheme_clusters def align(t1, t2): diff --git a/src/dinglehopper/word_error_rate.py b/src/dinglehopper/word_error_rate.py index 9bf36b6..8a1c9cb 100644 --- a/src/dinglehopper/word_error_rate.py +++ b/src/dinglehopper/word_error_rate.py @@ -7,7 +7,7 @@ import uniseg.wordbreak from multimethod import multimethod from rapidfuzz.distance import Levenshtein -from . import ExtractedText +from .extracted_text import ExtractedText # Did we patch uniseg.wordbreak.word_break already? word_break_patched = False