mirror of
				https://github.com/qurator-spk/ocrd_repair_inconsistencies.git
				synced 2025-10-31 00:44:13 +01:00 
			
		
		
		
	✨ Fix line order in regions
This commit is contained in:
		
							parent
							
								
									c7033a5d4d
								
							
						
					
					
						commit
						25437176d4
					
				
					 1 changed files with 16 additions and 0 deletions
				
			
		|  | @ -42,6 +42,21 @@ class RepairInconsistencies(Processor): | |||
|         kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL] | ||||
|         super(RepairInconsistencies, self).__init__(*args, **kwargs) | ||||
| 
 | ||||
|     def _fix_lines(self, region): | ||||
|         """Fix line order in a region""" | ||||
| 
 | ||||
|         lines = region.get_TextLine() | ||||
|         region_text = get_text(region) | ||||
|         lines_text = get_text(lines, '\n') | ||||
|         if region_text != lines_text: | ||||
|             # XXX Assumes top-to-bottom | ||||
|             sorted_lines = sorted(lines, key=lambda l: Polygon(polygon_from_points(l.get_Coords().points)).centroid.y) | ||||
|             sorted_lines_text = get_text(sorted_lines, '\n') | ||||
| 
 | ||||
|             if sorted_lines_text == region_text: | ||||
|                 LOG.info('Fixing line order of region "%s"', region.id) | ||||
|                 region.set_TextLine(sorted_lines) | ||||
| 
 | ||||
|     def _fix_words(self, line): | ||||
|         """Fix word order in a line""" | ||||
| 
 | ||||
|  | @ -82,6 +97,7 @@ class RepairInconsistencies(Processor): | |||
| 
 | ||||
|             regions = page.get_TextRegion() | ||||
|             for region in regions: | ||||
|                 self._fix_lines(region) | ||||
| 
 | ||||
|                 lines = region.get_TextLine() | ||||
|                 for line in lines: | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue