From 4832d1542fbb95cbf9fa19f893fb1fc5400645af Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Wed, 3 Jan 2024 20:38:49 +0100 Subject: [PATCH 1/4] =?UTF-8?q?=E2=9A=99=20pre-commit:=20Update=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4f3562a..b76b8b6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ repos: - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.10 + rev: v0.1.11 hooks: - args: - --fix From 071766efc2e32d8cd7c9ee0a8633ffdd62513e2e Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Wed, 3 Jan 2024 20:40:06 +0100 Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=90=9B=20Use=20Optional=20instead=20o?= =?UTF-8?q?f=20|=20none,=20for=20Python=20<3.10?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dinglehopper/align.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dinglehopper/align.py b/src/dinglehopper/align.py index 1f7957a..c5f12f7 100644 --- a/src/dinglehopper/align.py +++ b/src/dinglehopper/align.py @@ -1,6 +1,7 @@ import math import unicodedata from math import ceil +from typing import Optional from rapidfuzz.distance import Levenshtein @@ -14,7 +15,7 @@ def align(t1, t2): return seq_align(s1, s2) -def score_hint(er: float, n: int) -> int | None: +def score_hint(er: float, n: int) -> Optional[int]: """Calculate RapidFuzz score hint for a given error rate and count. Gives the score hint for the distance functions (= expected distance) or None if From c752793be65bb769fd5f4284182131a49a2beb54 Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Wed, 3 Jan 2024 20:52:07 +0100 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=90=9B=20Use=20typing.List=20instead?= =?UTF-8?q?=20of=20list,=20for=20Python=20<3.9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dinglehopper/character_error_rate.py | 4 ++-- src/dinglehopper/edit_distance.py | 3 ++- src/dinglehopper/extracted_text.py | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/dinglehopper/character_error_rate.py b/src/dinglehopper/character_error_rate.py index 3b8c0cc..c0e3fe1 100644 --- a/src/dinglehopper/character_error_rate.py +++ b/src/dinglehopper/character_error_rate.py @@ -1,5 +1,5 @@ import unicodedata -from typing import Tuple +from typing import Tuple, List from multimethod import multimethod from uniseg.graphemecluster import grapheme_clusters @@ -10,7 +10,7 @@ from .extracted_text import ExtractedText @multimethod def character_error_rate_n( - reference: list[str], compared: list[str] + reference: List[str], compared: List[str] ) -> Tuple[float, int]: """ Compute character error rate. diff --git a/src/dinglehopper/edit_distance.py b/src/dinglehopper/edit_distance.py index ef90d81..8eec5e2 100644 --- a/src/dinglehopper/edit_distance.py +++ b/src/dinglehopper/edit_distance.py @@ -1,4 +1,5 @@ import unicodedata +from typing import List from multimethod import multimethod from rapidfuzz.distance import Levenshtein @@ -8,7 +9,7 @@ from .extracted_text import ExtractedText @multimethod -def distance(seq1: list[str], seq2: list[str]): +def distance(seq1: List[str], seq2: List[str]): """Compute the Levenshtein edit distance between two lists of grapheme clusters. This assumes that the grapheme clusters are already normalized. diff --git a/src/dinglehopper/extracted_text.py b/src/dinglehopper/extracted_text.py index 28678e4..7ef9d1d 100644 --- a/src/dinglehopper/extracted_text.py +++ b/src/dinglehopper/extracted_text.py @@ -4,7 +4,7 @@ import re import unicodedata from contextlib import suppress from itertools import repeat -from typing import Optional +from typing import List, Optional import attr import numpy as np @@ -135,7 +135,7 @@ class ExtractedText: segments = attr.ib(type=Optional[list], converter=attr.converters.optional(list)) joiner = attr.ib(type=Optional[str]) _text = attr.ib(type=Optional[str]) - _grapheme_clusters = attr.ib(type=Optional[list[str]]) + _grapheme_clusters = attr.ib(type=Optional[List[str]]) @segments.validator def check(self, _, value): From 7a192880f1a5ea606108baf92e1d8b1a77f1282d Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Wed, 3 Jan 2024 20:58:24 +0100 Subject: [PATCH 4/4] =?UTF-8?q?=E2=AC=86=20Move=20on=20to=20supporting=20P?= =?UTF-8?q?ython=20>=3D=203.8=20only?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/test.yml | 10 ++-------- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 61dc014..0f8485a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,10 +25,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12" ] + python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] - # For Python 3.6, we need to fall back to Ubuntu 20.04 - runs-on: ${{ matrix.python-version == '3.6' && 'ubuntu-20.04' || 'ubuntu-latest' }} + runs-on: "ubuntu-latest" env: test_results_dir: test-results-${{ matrix.python-version }} @@ -44,11 +43,6 @@ jobs: - name: Update pip run: python3 -m pip install -U pip - - name: Avoid compiling OpenCV and NumPy on Python 3.6 - run: | - if python3 --version | grep -q "Python 3.6"; then - pip install --prefer-binary -U opencv-python-headless numpy - fi - name: Install requirements*.txt run: | for requirements_txt in requirements*.txt; do diff --git a/pyproject.toml b/pyproject.toml index da33b15..ce32d56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ authors = [ ] description = "The OCR evaluation tool" readme = "README.md" -requires-python = ">=3.6" +requires-python = ">=3.8" keywords = ["qurator", "ocr", "evaluation", "ocr-d"] dynamic = ["version", "dependencies", "optional-dependencies"]