mirror of
				https://github.com/qurator-spk/ocrd_repair_inconsistencies.git
				synced 2025-10-31 08:54:13 +01:00 
			
		
		
		
	🎨 Methods → functions
This commit is contained in:
		
							parent
							
								
									42484da95a
								
							
						
					
					
						commit
						dd9f1a3093
					
				
					 1 changed files with 53 additions and 51 deletions
				
			
		|  | @ -20,57 +20,13 @@ from .config import OCRD_TOOL | |||
| TOOL = 'ocrd_repair_inconsistencies' | ||||
| LOG = getLogger('processor.RepairInconsistencies') | ||||
| 
 | ||||
| 
 | ||||
| class RepairInconsistencies(Processor): | ||||
| 
 | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL] | ||||
|         super(RepairInconsistencies, self).__init__(*args, **kwargs) | ||||
| 
 | ||||
|     def _fix_lines(self, region): | ||||
|         """Fix line order in a region""" | ||||
| 
 | ||||
|         lines = region.get_TextLine() | ||||
|         region_text = get_text(region) | ||||
|         lines_text = get_text(lines, '\n') | ||||
|         if region_text != lines_text: | ||||
|             # XXX Assumes top-to-bottom | ||||
|             sorted_lines = sorted(lines, key=lambda l: Polygon(polygon_from_points(l.get_Coords().points)).centroid.y) | ||||
|             sorted_lines_text = get_text(sorted_lines, '\n') | ||||
| 
 | ||||
|             if sorted_lines_text == region_text: | ||||
|                 LOG.info('Fixing line order of region "%s"', region.id) | ||||
|                 region.set_TextLine(sorted_lines) | ||||
| 
 | ||||
|     def _fix_words(self, line): | ||||
|         """Fix word order in a line""" | ||||
| 
 | ||||
|         words = line.get_Word() | ||||
|         line_text = get_text(line) | ||||
|         words_text = get_text(words, ' ') | ||||
|         if line_text != words_text: | ||||
|             # XXX Assumes left-to-right | ||||
|             sorted_words = sorted(words, key=lambda w: Polygon(polygon_from_points(w.get_Coords().points)).centroid.x) | ||||
|             sorted_words_text = get_text(sorted_words, ' ') | ||||
| 
 | ||||
|             if sorted_words_text == line_text: | ||||
|                 LOG.info('Fixing word order of line "%s"', line.id) | ||||
|                 line.set_Word(sorted_words) | ||||
| 
 | ||||
|     def _fix_glyphs(self, word): | ||||
|         """Fix glyph order in a word""" | ||||
| 
 | ||||
|         glyphs = word.get_Glyph() | ||||
|         word_text = get_text(word) | ||||
|         glyphs_text = get_text(glyphs, '') | ||||
|         if word_text != glyphs_text: | ||||
|             # XXX Assumes left-to-right | ||||
|             sorted_glyphs = sorted(glyphs, key=lambda g: Polygon(polygon_from_points(g.get_Coords().points)).centroid.x) | ||||
|             sorted_glyphs_text = get_text(sorted_glyphs, '') | ||||
| 
 | ||||
|             if sorted_glyphs_text == word_text: | ||||
|                 LOG.info('Fixing glyph order of word "%s"', word.id) | ||||
|                 word.set_Glyph(sorted_glyphs) | ||||
| 
 | ||||
|     def process(self): | ||||
|         for (n, input_file) in enumerate(self.input_files): | ||||
|             page_id = input_file.pageId or input_file.ID | ||||
|  | @ -78,18 +34,17 @@ class RepairInconsistencies(Processor): | |||
|             pcgts = page_from_file(self.workspace.download_file(input_file)) | ||||
|             page = pcgts.get_Page() | ||||
| 
 | ||||
| 
 | ||||
|             regions = page.get_TextRegion() | ||||
|             for region in regions: | ||||
|                 self._fix_lines(region) | ||||
|                 _fix_lines(region) | ||||
| 
 | ||||
|                 lines = region.get_TextLine() | ||||
|                 for line in lines: | ||||
|                     self._fix_words(line) | ||||
|                     _fix_words(line) | ||||
| 
 | ||||
|                     words = line.get_Word() | ||||
|                     for word in words: | ||||
|                         self._fix_glyphs(word) | ||||
|                         _fix_glyphs(word) | ||||
| 
 | ||||
|             file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp) | ||||
|             if file_id == input_file.ID: | ||||
|  | @ -103,7 +58,6 @@ class RepairInconsistencies(Processor): | |||
|                 content=to_xml(pcgts)) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| def get_text(thing, joiner=None): | ||||
|     """Get the text of the given thing, joining if necessary""" | ||||
| 
 | ||||
|  | @ -119,3 +73,51 @@ def get_text(thing, joiner=None): | |||
|     else: | ||||
|         text = _get_text_for_one(thing) | ||||
|     return text | ||||
| 
 | ||||
| 
 | ||||
| def _fix_words(line): | ||||
|     """Fix word order in a line""" | ||||
| 
 | ||||
|     words = line.get_Word() | ||||
|     line_text = get_text(line) | ||||
|     words_text = get_text(words, ' ') | ||||
|     if line_text != words_text: | ||||
|         # XXX Assumes left-to-right | ||||
|         sorted_words = sorted(words, key=lambda w: Polygon(polygon_from_points(w.get_Coords().points)).centroid.x) | ||||
|         sorted_words_text = get_text(sorted_words, ' ') | ||||
| 
 | ||||
|         if sorted_words_text == line_text: | ||||
|             LOG.info('Fixing word order of line "%s"', line.id) | ||||
|             line.set_Word(sorted_words) | ||||
| 
 | ||||
| 
 | ||||
| def _fix_glyphs(word): | ||||
|     """Fix glyph order in a word""" | ||||
| 
 | ||||
|     glyphs = word.get_Glyph() | ||||
|     word_text = get_text(word) | ||||
|     glyphs_text = get_text(glyphs, '') | ||||
|     if word_text != glyphs_text: | ||||
|         # XXX Assumes left-to-right | ||||
|         sorted_glyphs = sorted(glyphs, key=lambda g: Polygon(polygon_from_points(g.get_Coords().points)).centroid.x) | ||||
|         sorted_glyphs_text = get_text(sorted_glyphs, '') | ||||
| 
 | ||||
|         if sorted_glyphs_text == word_text: | ||||
|             LOG.info('Fixing glyph order of word "%s"', word.id) | ||||
|             word.set_Glyph(sorted_glyphs) | ||||
| 
 | ||||
| 
 | ||||
| def _fix_lines(region): | ||||
|     """Fix line order in a region""" | ||||
| 
 | ||||
|     lines = region.get_TextLine() | ||||
|     region_text = get_text(region) | ||||
|     lines_text = get_text(lines, '\n') | ||||
|     if region_text != lines_text: | ||||
|         # XXX Assumes top-to-bottom | ||||
|         sorted_lines = sorted(lines, key=lambda l: Polygon(polygon_from_points(l.get_Coords().points)).centroid.y) | ||||
|         sorted_lines_text = get_text(sorted_lines, '\n') | ||||
| 
 | ||||
|         if sorted_lines_text == region_text: | ||||
|             LOG.info('Fixing line order of region "%s"', region.id) | ||||
|             region.set_TextLine(sorted_lines) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue