From 909632493b74463b5b336129d34ae9a285143980 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Mon, 3 Feb 2020 17:37:19 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20Add=20future=20TODOs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ocrd_calamari/recognize.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py index 222bff3..bde4218 100644 --- a/ocrd_calamari/recognize.py +++ b/ocrd_calamari/recognize.py @@ -116,6 +116,7 @@ class CalamariRecognize(Processor): word_no = 0 i = 0 for word_text in uniseg.wordbreak.words(prediction.sentence): + # XXX Re-use word segmentation from dinglehopper, i.e. support private use characters word_length = len(word_text) do_not_include = all(unwanted(c) for c in word_text) @@ -126,6 +127,7 @@ class CalamariRecognize(Processor): polygon = polygon_from_x0y0x1y1([word_start, 0, word_end, line_image.height]) points = points_from_polygon(coordinates_for_segment(polygon, None, line_coords)) + # XXX Crop to line polygon? word = WordType(id='%s_word%04d' % (line.id, word_no), Coords=CoordsType(points)) word.add_TextEquiv(TextEquivType(Unicode=word_text))