From 4f28cd905acf46b264a96960015131126a25d7d7 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 25 Oct 2019 18:08:31 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=B9=20sbb=5Ftextline=5Fdetector:=20Do?= =?UTF-8?q?=20not=20create=20empty/space-only=20TextEquivs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ocrd_tesserocr or ocrd_cis complain about already existing text if empty/space-only TextEquivs elements exist after segmentation. Also, it does not make sense to create them in a segmentation step. Fix by removing the code generating the elements. --- qurator/sbb_textline_detector/main.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 86ba3c1..5aca833 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1826,10 +1826,6 @@ class textlineerkenner: coord = ET.SubElement(textline, 'Coords') - texteq=ET.SubElement(textline, 'TextEquiv') - - uni=ET.SubElement(texteq, 'Unicode') - uni.text = ' ' #points = ET.SubElement(coord, 'Points') @@ -1859,10 +1855,6 @@ class textlineerkenner: #print(points_co) coord.set('points',points_co) - texteqreg=ET.SubElement(textregion, 'TextEquiv') - - unireg=ET.SubElement(texteqreg, 'Unicode') - unireg.text = ' '