mirror of
https://github.com/qurator-spk/sbb_ner.git
synced 2025-07-27 19:59:53 +02:00
fix NER composition bug
This commit is contained in:
parent
b2fdff0d9e
commit
15d99074a9
1 changed files with 7 additions and 0 deletions
|
@ -260,6 +260,8 @@ def ner(model_id):
|
||||||
for (tokens, word_predictions), (input_sentence, _) in zip(prediction, sentences):
|
for (tokens, word_predictions), (input_sentence, _) in zip(prediction, sentences):
|
||||||
|
|
||||||
original_text = "".join(input_sentence).replace(" ", "")
|
original_text = "".join(input_sentence).replace(" ", "")
|
||||||
|
original_word_positions = \
|
||||||
|
[pos for positions in [[idx] * len(word) for idx, word in enumerate(input_sentence)] for pos in positions]
|
||||||
|
|
||||||
word = ''
|
word = ''
|
||||||
last_prediction = 'O'
|
last_prediction = 'O'
|
||||||
|
@ -274,8 +276,13 @@ def ner(model_id):
|
||||||
word = ''
|
word = ''
|
||||||
|
|
||||||
if token == '[UNK]':
|
if token == '[UNK]':
|
||||||
|
|
||||||
orig_pos = len("".join([pred['word'] for pred in output_sentence]) + word)
|
orig_pos = len("".join([pred['word'] for pred in output_sentence]) + word)
|
||||||
|
|
||||||
|
if orig_pos > 0 and original_word_positions[orig_pos-1] != original_word_positions[orig_pos]:
|
||||||
|
output_sentence.append({'word': word, 'prediction': last_prediction})
|
||||||
|
word = ''
|
||||||
|
|
||||||
word += original_text[orig_pos]
|
word += original_text[orig_pos]
|
||||||
|
|
||||||
continue
|
continue
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue