From 3149e1d9e062ade7d39da916459e9c01e7a965c0 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Mon, 3 Feb 2020 15:33:38 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=9D=20unwanted()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ocrd_calamari/recognize.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py index 0624d9b..222bff3 100644 --- a/ocrd_calamari/recognize.py +++ b/ocrd_calamari/recognize.py @@ -106,6 +106,11 @@ class CalamariRecognize(Processor): # a strict hierarchy of lines > words > glyphs. def unwanted(c): + """ + Define unwanted characters + + Words only containing these e.g. whitespace characters are not considered as words. + """ return c == " " word_no = 0