From b41278f99b5e789b0d400294dfeb029d38f81c15 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 26 Nov 2019 12:43:52 +0000 Subject: [PATCH 1/2] improve description of behaviour in README clarify: - what levels are re-ordered (`ReadingOrder` unaffected/unrelated) - textual consistency is only the trigger, not the driver - coordinate-based order respects annotation --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index ba20b4e..e6e8071 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,12 @@ # ocrd_repair_inconsistencies -Automatically fix PAGE-XML order inconsistencies in regions, lines and words. -Children elements are only reordered if reordering by coordinates -top-to-bottom/left-to-right fixes the appropriately concatenated `TextEquiv` -texts of the children to match the parent's `TextEquiv` text. This processor -does not change reading order, just the order of the XML elements in the file. +Automatically re-order lines, words and glyphs to become textually consistent with their parents. + +PAGE-XML elements with textual annotation are re-ordered by their centroid coordinates +in top-to-bottom/left-to-right fashion iff such re-ordering fixes the inconsistency +between their appropriately concatenated `TextEquiv` texts with their parent's `TextEquiv` text. + +This processor does not affect `ReadingOrder` between regions, just the order of the XML elements below the region level, and only if not contradicting the annotated `textLineOrder`/`readingDirection`. We wrote this as a one-shot script to fix some files. Use with caution. From f9125b59d87b387a95e1ebc74cc509f0d380be4a Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 26 Nov 2019 12:49:30 +0000 Subject: [PATCH 2/2] better tool description, fix steps, no underscores --- ocrd_repair_inconsistencies/ocrd-tool.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ocrd_repair_inconsistencies/ocrd-tool.json b/ocrd_repair_inconsistencies/ocrd-tool.json index c77f431..e5a2148 100644 --- a/ocrd_repair_inconsistencies/ocrd-tool.json +++ b/ocrd_repair_inconsistencies/ocrd-tool.json @@ -1,11 +1,11 @@ { "tools": { - "ocrd_repair_inconsistencies": { - "executable": "ocrd_repair_inconsistencies", + "ocrd-repair-inconsistencies": { + "executable": "ocrd-repair-inconsistencies", "categories": [ "Layout analysis" ], - "description": "Repair glyph/word/line order inconsistencies", + "description": "Re-order glyphs/words/lines top-down-left-right when textually inconsistent with their parents", "input_file_grp": [ "OCR-D-SEG-BLOCK" ], @@ -13,9 +13,9 @@ "OCR-D-SEG-BLOCK-FIXED" ], "steps": [ - "layout/segmentation/region", "layout/segmentation/line", - "layout/segmentation/words" + "layout/segmentation/word", + "layout/segmentation/glyph" ] } }