From 4d4ead4cc80075aa415be8beebfe81298a247144 Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Tue, 26 Mar 2024 19:34:22 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fix=20word=20segmentation=20with?= =?UTF-8?q?=20uniseg=200.8.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements.txt | 2 +- src/dinglehopper/word_error_rate.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 851fec1..8f863cc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ click jinja2 lxml -uniseg >= 0.7.2 +uniseg >= 0.8.0 numpy colorama MarkupSafe diff --git a/src/dinglehopper/word_error_rate.py b/src/dinglehopper/word_error_rate.py index b6e0a3a..b759a69 100644 --- a/src/dinglehopper/word_error_rate.py +++ b/src/dinglehopper/word_error_rate.py @@ -24,7 +24,7 @@ def patch_word_break(): def new_word_break(c, index=0): if 0xE000 <= ord(c) <= 0xF8FF: # Private Use Area - return "ALetter" + return uniseg.wordbreak.WordBreak.ALETTER else: return old_word_break(c, index)