mirror of
https://github.com/qurator-spk/ocrd_repair_inconsistencies.git
synced 2025-06-09 19:59:59 +02:00
✨ Fix line order in regions
This commit is contained in:
parent
c7033a5d4d
commit
25437176d4
1 changed files with 16 additions and 0 deletions
|
@ -42,6 +42,21 @@ class RepairInconsistencies(Processor):
|
|||
kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL]
|
||||
super(RepairInconsistencies, self).__init__(*args, **kwargs)
|
||||
|
||||
def _fix_lines(self, region):
|
||||
"""Fix line order in a region"""
|
||||
|
||||
lines = region.get_TextLine()
|
||||
region_text = get_text(region)
|
||||
lines_text = get_text(lines, '\n')
|
||||
if region_text != lines_text:
|
||||
# XXX Assumes top-to-bottom
|
||||
sorted_lines = sorted(lines, key=lambda l: Polygon(polygon_from_points(l.get_Coords().points)).centroid.y)
|
||||
sorted_lines_text = get_text(sorted_lines, '\n')
|
||||
|
||||
if sorted_lines_text == region_text:
|
||||
LOG.info('Fixing line order of region "%s"', region.id)
|
||||
region.set_TextLine(sorted_lines)
|
||||
|
||||
def _fix_words(self, line):
|
||||
"""Fix word order in a line"""
|
||||
|
||||
|
@ -82,6 +97,7 @@ class RepairInconsistencies(Processor):
|
|||
|
||||
regions = page.get_TextRegion()
|
||||
for region in regions:
|
||||
self._fix_lines(region)
|
||||
|
||||
lines = region.get_TextLine()
|
||||
for line in lines:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue