From 64444dd419c7f758ee7ebb42db3746ee016fab7a Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 17 Apr 2025 16:08:41 +0200 Subject: [PATCH] opt out of 7f8a8dd5 (uniseg update that requires py39) --- requirements.txt | 2 +- src/dinglehopper/word_error_rate.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 123187b..653ec59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ click jinja2 lxml -uniseg >= 0.9.1 +uniseg >= 0.8.0 numpy colorama MarkupSafe diff --git a/src/dinglehopper/word_error_rate.py b/src/dinglehopper/word_error_rate.py index ec039b3..f2db504 100644 --- a/src/dinglehopper/word_error_rate.py +++ b/src/dinglehopper/word_error_rate.py @@ -21,10 +21,15 @@ def patch_word_break(): https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/WordBreakProperty.txt """ old_word_break = uniseg.wordbreak.word_break + if hasattr(uniseg.wordbreak, 'Word_Break'): + aletter = uniseg.wordbreak.Word_Break.ALetter + else: + # uniseg<0.9 + aletter = uniseg.wordbreak.WordBreak.ALETTER def new_word_break(c): if 0xE000 <= ord(c) <= 0xF8FF: # Private Use Area - return uniseg.wordbreak.Word_Break.ALetter + return aletter else: return old_word_break(c)