diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 277d4ba..387f7a2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,7 +25,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ] + python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12", "3.13" ] runs-on: "ubuntu-latest" diff --git a/pyproject.toml b/pyproject.toml index 9dabb41..62fae82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ authors = [ description = "An OCR evaluation tool" readme = "README.md" license.file = "LICENSE" -requires-python = ">=3.9" +requires-python = ">=3.8" keywords = ["qurator", "ocr", "evaluation", "ocr-d"] dynamic = ["version", "dependencies", "optional-dependencies"] diff --git a/requirements.txt b/requirements.txt index 123187b..653ec59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ click jinja2 lxml -uniseg >= 0.9.1 +uniseg >= 0.8.0 numpy colorama MarkupSafe diff --git a/src/dinglehopper/word_error_rate.py b/src/dinglehopper/word_error_rate.py index ec039b3..f2db504 100644 --- a/src/dinglehopper/word_error_rate.py +++ b/src/dinglehopper/word_error_rate.py @@ -21,10 +21,15 @@ def patch_word_break(): https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/WordBreakProperty.txt """ old_word_break = uniseg.wordbreak.word_break + if hasattr(uniseg.wordbreak, 'Word_Break'): + aletter = uniseg.wordbreak.Word_Break.ALetter + else: + # uniseg<0.9 + aletter = uniseg.wordbreak.WordBreak.ALETTER def new_word_break(c): if 0xE000 <= ord(c) <= 0xF8FF: # Private Use Area - return uniseg.wordbreak.Word_Break.ALetter + return aletter else: return old_word_break(c)