From 53824bf904aae5b816d384d65080df6ab28d6a28 Mon Sep 17 00:00:00 2001 From: Kai Labusch Date: Fri, 8 Nov 2024 10:54:57 +0100 Subject: [PATCH] fix unicode problem --- qurator/sbb_ner/webapp/app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qurator/sbb_ner/webapp/app.py b/qurator/sbb_ner/webapp/app.py index cbeb25b..4bb8bb3 100644 --- a/qurator/sbb_ner/webapp/app.py +++ b/qurator/sbb_ner/webapp/app.py @@ -253,7 +253,8 @@ def ner(model_id=None): orig_pos = len(output_text + word) # are we on a word boundary? - if orig_pos > 0 and original_word_positions[orig_pos-1] != original_word_positions[orig_pos]: + if len(word) > 0 and orig_pos > 0 \ + and original_word_positions[orig_pos-1] != original_word_positions[orig_pos]: # we are on a word boundary - start a new word ... output_sentence.append({'word': word, 'prediction': word_prediction})