From 3eabe5054a2af844b67a0a2977a652555e26385f Mon Sep 17 00:00:00 2001 From: Kai Labusch Date: Fri, 22 Nov 2019 16:55:13 +0100 Subject: [PATCH] fix NER output; fix BERT Tokenizer --- qurator/sbb_ner/webapp/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qurator/sbb_ner/webapp/app.py b/qurator/sbb_ner/webapp/app.py index 5d116d7..524f3ed 100644 --- a/qurator/sbb_ner/webapp/app.py +++ b/qurator/sbb_ner/webapp/app.py @@ -271,7 +271,7 @@ def ner(model_id): if token == '[UNK]': orig_pos = len("".join([pred['word'] for pred in output_sentence])) - output_sentence.append({'word': original_text[orig_pos], 'prediction': 'O'}) + output_sentence.append({'word': original_text[orig_pos], 'prediction': last_prediction}) continue token = token[2:] if token.startswith('##') else token