mirror of
https://github.com/qurator-spk/ocrd_repair_inconsistencies.git
synced 2025-06-09 19:59:59 +02:00
✨ Fix line order in regions
This commit is contained in:
parent
c7033a5d4d
commit
25437176d4
1 changed files with 16 additions and 0 deletions
|
@ -42,6 +42,21 @@ class RepairInconsistencies(Processor):
|
||||||
kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL]
|
kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL]
|
||||||
super(RepairInconsistencies, self).__init__(*args, **kwargs)
|
super(RepairInconsistencies, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def _fix_lines(self, region):
|
||||||
|
"""Fix line order in a region"""
|
||||||
|
|
||||||
|
lines = region.get_TextLine()
|
||||||
|
region_text = get_text(region)
|
||||||
|
lines_text = get_text(lines, '\n')
|
||||||
|
if region_text != lines_text:
|
||||||
|
# XXX Assumes top-to-bottom
|
||||||
|
sorted_lines = sorted(lines, key=lambda l: Polygon(polygon_from_points(l.get_Coords().points)).centroid.y)
|
||||||
|
sorted_lines_text = get_text(sorted_lines, '\n')
|
||||||
|
|
||||||
|
if sorted_lines_text == region_text:
|
||||||
|
LOG.info('Fixing line order of region "%s"', region.id)
|
||||||
|
region.set_TextLine(sorted_lines)
|
||||||
|
|
||||||
def _fix_words(self, line):
|
def _fix_words(self, line):
|
||||||
"""Fix word order in a line"""
|
"""Fix word order in a line"""
|
||||||
|
|
||||||
|
@ -82,6 +97,7 @@ class RepairInconsistencies(Processor):
|
||||||
|
|
||||||
regions = page.get_TextRegion()
|
regions = page.get_TextRegion()
|
||||||
for region in regions:
|
for region in regions:
|
||||||
|
self._fix_lines(region)
|
||||||
|
|
||||||
lines = region.get_TextLine()
|
lines = region.get_TextLine()
|
||||||
for line in lines:
|
for line in lines:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue