From 4f28cd905acf46b264a96960015131126a25d7d7 Mon Sep 17 00:00:00 2001
From: "Gerber, Mike" <mike.gerber@sbb.spk-berlin.de>
Date: Fri, 25 Oct 2019 18:08:31 +0200
Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=B9=20sbb=5Ftextline=5Fdetector:=20Do?=
 =?UTF-8?q?=20not=20create=20empty/space-only=20TextEquivs?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ocrd_tesserocr or ocrd_cis complain about already existing text if
empty/space-only TextEquivs elements exist after segmentation. Also, it
does not make sense to create them in a segmentation step.

Fix by removing the code generating the elements.
---
 qurator/sbb_textline_detector/main.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py
index 86ba3c1..5aca833 100644
--- a/qurator/sbb_textline_detector/main.py
+++ b/qurator/sbb_textline_detector/main.py
@@ -1826,10 +1826,6 @@ class textlineerkenner:
     
                     coord = ET.SubElement(textline, 'Coords')
     
-                    texteq=ET.SubElement(textline, 'TextEquiv')
-    
-                    uni=ET.SubElement(texteq, 'Unicode')
-                    uni.text = ' ' 
     
                     #points = ET.SubElement(coord, 'Points') 
     
@@ -1859,10 +1855,6 @@ class textlineerkenner:
                     #print(points_co)
                     coord.set('points',points_co)
                     
-                texteqreg=ET.SubElement(textregion, 'TextEquiv')
-    
-                unireg=ET.SubElement(texteqreg, 'Unicode')
-                unireg.text = ' '