mirror of
				https://github.com/mikegerber/ocrd_calamari.git
				synced 2025-10-31 15:54:13 +01:00 
			
		
		
		
	🐛 Sort predictions in exactly the same way to make sure we are correctly removing spaces
This commit is contained in:
		
							parent
							
								
									d2c843aa3f
								
							
						
					
					
						commit
						0c9e1f13c7
					
				
					 1 changed files with 12 additions and 11 deletions
				
			
		|  | @ -92,8 +92,16 @@ class CalamariRecognize(Processor): | ||||||
|                     # |                     # | ||||||
|                     # XXX Check Calamari's built-in post-processing on prediction.sentence |                     # XXX Check Calamari's built-in post-processing on prediction.sentence | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
|  |                     def _sort_chars(p): | ||||||
|  |                         """Filter and sort chars of prediction p""" | ||||||
|  |                         chars = p.chars | ||||||
|  |                         chars = [c for c in chars if c.char]  # XXX Note that omission probabilities are not normalized?! | ||||||
|  |                         chars = [c for c in chars if c.probability >= self.parameter['glyph_conf_cutoff']] | ||||||
|  |                         chars = sorted(chars, key=lambda k: k.probability, reverse=True) | ||||||
|  |                         return chars | ||||||
|                     def _drop_leading_spaces(positions): |                     def _drop_leading_spaces(positions): | ||||||
|                         return list(itertools.dropwhile(lambda p: p.chars[0].char == " ", positions)) |                         return list(itertools.dropwhile(lambda p: _sort_chars(p)[0].char == " ", positions)) | ||||||
|                     def _drop_trailing_spaces(positions): |                     def _drop_trailing_spaces(positions): | ||||||
|                         return list(reversed(_drop_leading_spaces(reversed(positions)))) |                         return list(reversed(_drop_leading_spaces(reversed(positions)))) | ||||||
|                     def _drop_double_spaces(positions): |                     def _drop_double_spaces(positions): | ||||||
|  | @ -184,17 +192,10 @@ class CalamariRecognize(Processor): | ||||||
| 
 | 
 | ||||||
|                                         glyph = GlyphType(id='%s_glyph%04d' % (word.id, glyph_no), Coords=CoordsType(points)) |                                         glyph = GlyphType(id='%s_glyph%04d' % (word.id, glyph_no), Coords=CoordsType(points)) | ||||||
| 
 | 
 | ||||||
|                                         # Filter predictions |                                         # Add predictions (= TextEquivs) | ||||||
|                                         chars = p.chars |                                         char_index_start = 1  # Must start with 1, see https://ocr-d.github.io/page#multiple-textequivs | ||||||
|                                         chars = [c for c in chars if c.char]  # XXX Note that omission probabilities are not normalized?! |                                         for char_index, char in enumerate(_sort_chars(p), start=char_index_start): | ||||||
|                                         chars = [c for c in chars if c.probability >= self.parameter['glyph_conf_cutoff']] |  | ||||||
| 
 |  | ||||||
|                                         # Sort and add predictions (= TextEquivs) |  | ||||||
|                                         chars = sorted(chars, key=lambda k: k.probability, reverse=True) |  | ||||||
|                                         char_index = 1  # Must start with 1, see https://ocr-d.github.io/page#multiple-textequivs |  | ||||||
|                                         for char in chars: |  | ||||||
|                                             glyph.add_TextEquiv(TextEquivType(Unicode=char.char, index=char_index, conf=char.probability)) |                                             glyph.add_TextEquiv(TextEquivType(Unicode=char.char, index=char_index, conf=char.probability)) | ||||||
|                                             char_index += 1 |  | ||||||
| 
 | 
 | ||||||
|                                         word.add_Glyph(glyph) |                                         word.add_Glyph(glyph) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue