mirror of
				https://github.com/qurator-spk/sbb_ner.git
				synced 2025-11-04 11:34:13 +01:00 
			
		
		
		
	fix NER output; fix BERT Tokenizer
This commit is contained in:
		
							parent
							
								
									3eabe5054a
								
							
						
					
					
						commit
						775d0cd753
					
				
					 1 changed files with 5 additions and 4 deletions
				
			
		| 
						 | 
					@ -262,16 +262,17 @@ def ner(model_id):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for pos, (token, word_pred) in enumerate(zip(tokens, word_predictions)):
 | 
					        for pos, (token, word_pred) in enumerate(zip(tokens, word_predictions)):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if not token.startswith('##'):
 | 
					            if not token.startswith('##') and token != '[UNK]':
 | 
				
			||||||
                if len(word) > 0:
 | 
					                if len(word) > 0:
 | 
				
			||||||
                    output_sentence.append({'word': word, 'prediction': last_prediction})
 | 
					                    output_sentence.append({'word': word, 'prediction': last_prediction} )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                word = ''
 | 
					                word = ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if token == '[UNK]':
 | 
					            if token == '[UNK]':
 | 
				
			||||||
                orig_pos = len("".join([pred['word'] for pred in output_sentence]))
 | 
					                orig_pos = len("".join([pred['word'] for pred in output_sentence]) + word)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                word += original_text[orig_pos]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                output_sentence.append({'word': original_text[orig_pos], 'prediction': last_prediction})
 | 
					 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            token = token[2:] if token.startswith('##') else token
 | 
					            token = token[2:] if token.startswith('##') else token
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue