From 0f9c94e7dc4f4577ec1465a1cb0613d310941728 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Mon, 3 Feb 2020 17:40:45 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Start=20with=20TextEquiv=20index?= =?UTF-8?q?=3D1=20to=20adhere=20to=20OCR-D=20PAGE=20conventions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://ocr-d.github.io/page#multiple-textequivs --- ocrd_calamari/recognize.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py index bde4218..4c27e5e 100644 --- a/ocrd_calamari/recognize.py +++ b/ocrd_calamari/recognize.py @@ -142,10 +142,12 @@ class CalamariRecognize(Processor): glyph = GlyphType(id='%s_glyph%04d' % (word.id, glyph_no), Coords=CoordsType(points)) chars = sorted(p.chars, key=lambda k: k.probability, reverse=True) - for index, char in enumerate(chars): + char_index = 1 # Must start with 1, see https://ocr-d.github.io/page#multiple-textequivs + for char in chars: if char.char: - glyph.add_TextEquiv(TextEquivType(Unicode=char.char, index=index, conf=char.probability)) - # XXX Note that omission probabilities are not normalized?! + glyph.add_TextEquiv(TextEquivType(Unicode=char.char, index=char_index, conf=char.probability)) + char_index += 1 + # XXX Note that omission probabilities are not normalized?! word.add_Glyph(glyph)