From 5c09883c82372911b46c6dadfd03304abf6be261 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 25 Jun 2020 20:27:15 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Support=20TextRegions=20nested=20in?= =?UTF-8?q?=20Table/GraphicRegions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ocrd_repair_inconsistencies/ocrd_repair_inconsistencies.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ocrd_repair_inconsistencies/ocrd_repair_inconsistencies.py b/ocrd_repair_inconsistencies/ocrd_repair_inconsistencies.py index 3e47b51..82500aa 100644 --- a/ocrd_repair_inconsistencies/ocrd_repair_inconsistencies.py +++ b/ocrd_repair_inconsistencies/ocrd_repair_inconsistencies.py @@ -48,8 +48,11 @@ class RepairInconsistencies(Processor): Label=[LabelType(type_=name, value=self.parameter[name]) for name in self.parameter.keys()])])) - - regions = page.get_TextRegion() + + regions = [] + regions.extend(page.get_TextRegion()) + for special_region in page.get_TableRegion() + page.get_GraphicRegion(): + regions.extend(special_region.get_TextRegion()) for region in regions: textLineOrder = 'top-to-bottom'