mirror of
https://github.com/qurator-spk/sbb_ner.git
synced 2025-06-08 03:40:31 +02:00
fix NER output; fix BERT Tokenizer
This commit is contained in:
parent
3eabe5054a
commit
775d0cd753
1 changed files with 5 additions and 4 deletions
|
@ -262,16 +262,17 @@ def ner(model_id):
|
||||||
|
|
||||||
for pos, (token, word_pred) in enumerate(zip(tokens, word_predictions)):
|
for pos, (token, word_pred) in enumerate(zip(tokens, word_predictions)):
|
||||||
|
|
||||||
if not token.startswith('##'):
|
if not token.startswith('##') and token != '[UNK]':
|
||||||
if len(word) > 0:
|
if len(word) > 0:
|
||||||
output_sentence.append({'word': word, 'prediction': last_prediction})
|
output_sentence.append({'word': word, 'prediction': last_prediction} )
|
||||||
|
|
||||||
word = ''
|
word = ''
|
||||||
|
|
||||||
if token == '[UNK]':
|
if token == '[UNK]':
|
||||||
orig_pos = len("".join([pred['word'] for pred in output_sentence]))
|
orig_pos = len("".join([pred['word'] for pred in output_sentence]) + word)
|
||||||
|
|
||||||
|
word += original_text[orig_pos]
|
||||||
|
|
||||||
output_sentence.append({'word': original_text[orig_pos], 'prediction': last_prediction})
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
token = token[2:] if token.startswith('##') else token
|
token = token[2:] if token.startswith('##') else token
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue