diff --git a/src/eynollah/ocrd-tool.json b/src/eynollah/ocrd-tool.json index ce15206..af5e03f 100644 --- a/src/eynollah/ocrd-tool.json +++ b/src/eynollah/ocrd-tool.json @@ -74,6 +74,11 @@ "type": "boolean", "default": false, "description": "ignore the special role of headings during reading order detection" + }, + "reading_order_machine_based": { + "type": "boolean", + "default": false, + "description": "use data-driven (rather than rule-based) reading order detection" } }, "resources": [ diff --git a/src/eynollah/processor.py b/src/eynollah/processor.py index a53fede..c2922c1 100644 --- a/src/eynollah/processor.py +++ b/src/eynollah/processor.py @@ -24,6 +24,7 @@ class EynollahProcessor(Processor): allow_enhancement=self.parameter['allow_enhancement'], curved_line=self.parameter['curved_line'], right2left=self.parameter['right_to_left'], + reading_order_machine_based=self.parameter['reading_order_machine_based'], ignore_page_extraction=self.parameter['ignore_page_extraction'], light_version=self.parameter['light_version'], textline_light=self.parameter['textline_light'], @@ -57,6 +58,8 @@ class EynollahProcessor(Processor): - If ``ignore_page_extraction``, then attempt no cropping of the page. - If ``curved_line``, then compute contour polygons for text lines instead of simple bounding boxes. + - If ``reading_order_machine_based``, then detect reading order via + data-driven model instead of geometrical heuristics. Produce a new output file by serialising the resulting hierarchy. """