Merge pull request #171 from bertsky/ocrd-machine-based-ro

OCR-D processor: expose reading_order_machine_based
This commit is contained in:
Clemens Neudecker 2025-08-15 18:40:13 +02:00 committed by GitHub
commit a2359ea4c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 8 additions and 0 deletions

View file

@ -74,6 +74,11 @@
"type": "boolean",
"default": false,
"description": "ignore the special role of headings during reading order detection"
},
"reading_order_machine_based": {
"type": "boolean",
"default": false,
"description": "use data-driven (rather than rule-based) reading order detection"
}
},
"resources": [

View file

@ -24,6 +24,7 @@ class EynollahProcessor(Processor):
allow_enhancement=self.parameter['allow_enhancement'],
curved_line=self.parameter['curved_line'],
right2left=self.parameter['right_to_left'],
reading_order_machine_based=self.parameter['reading_order_machine_based'],
ignore_page_extraction=self.parameter['ignore_page_extraction'],
light_version=self.parameter['light_version'],
textline_light=self.parameter['textline_light'],
@ -57,6 +58,8 @@ class EynollahProcessor(Processor):
- If ``ignore_page_extraction``, then attempt no cropping of the page.
- If ``curved_line``, then compute contour polygons for text lines
instead of simple bounding boxes.
- If ``reading_order_machine_based``, then detect reading order via
data-driven model instead of geometrical heuristics.
Produce a new output file by serialising the resulting hierarchy.
"""