diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py index 0624d9b..222bff3 100644 --- a/ocrd_calamari/recognize.py +++ b/ocrd_calamari/recognize.py @@ -106,6 +106,11 @@ class CalamariRecognize(Processor): # a strict hierarchy of lines > words > glyphs. def unwanted(c): + """ + Define unwanted characters + + Words only containing these e.g. whitespace characters are not considered as words. + """ return c == " " word_no = 0