From e4e2777cb7727088f1183c832e0164e6bac119af Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Wed, 21 Oct 2020 17:59:44 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20dinglehopper:=20Do=20try=20to=20?= =?UTF-8?q?get=20text=20when=20no=20TextEquivs=20exist?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/dinglehopper/extracted_text.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/qurator/dinglehopper/extracted_text.py b/qurator/dinglehopper/extracted_text.py index 352c697..46c8fec 100644 --- a/qurator/dinglehopper/extracted_text.py +++ b/qurator/dinglehopper/extracted_text.py @@ -185,6 +185,10 @@ class ExtractedText: def get_textequiv_unicode(s): """Get the TextEquiv/Unicode text of the given PAGE text element""" textequivs = s.findall('./page:TextEquiv', namespaces=nsmap) + + if not textequivs: + return None + def get_index(te): index = te.attrib.get('index') try: