diff --git a/requirements.txt b/requirements.txt index 123187b..653ec59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ click jinja2 lxml -uniseg >= 0.9.1 +uniseg >= 0.8.0 numpy colorama MarkupSafe diff --git a/src/dinglehopper/word_error_rate.py b/src/dinglehopper/word_error_rate.py index ec039b3..f2db504 100644 --- a/src/dinglehopper/word_error_rate.py +++ b/src/dinglehopper/word_error_rate.py @@ -21,10 +21,15 @@ def patch_word_break(): https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/WordBreakProperty.txt """ old_word_break = uniseg.wordbreak.word_break + if hasattr(uniseg.wordbreak, 'Word_Break'): + aletter = uniseg.wordbreak.Word_Break.ALetter + else: + # uniseg<0.9 + aletter = uniseg.wordbreak.WordBreak.ALETTER def new_word_break(c): if 0xE000 <= ord(c) <= 0xF8FF: # Private Use Area - return uniseg.wordbreak.Word_Break.ALetter + return aletter else: return old_word_break(c)