diff --git a/qurator/dinglehopper/extracted_text.py b/qurator/dinglehopper/extracted_text.py
index a0be84a..916b123 100644
--- a/qurator/dinglehopper/extracted_text.py
+++ b/qurator/dinglehopper/extracted_text.py
@@ -241,7 +241,7 @@ def get_textequiv_unicode(text_segment, nsmap) -> str:
return ''
textequiv = get_first_textequiv(textequivs, segment_id)
- return textequiv.find('./page:Unicode', namespaces=nsmap).text
+ return textequiv.find('./page:Unicode', namespaces=nsmap).text or ''
def get_first_textequiv(textequivs, segment_id):
diff --git a/qurator/dinglehopper/tests/extracted_text_test.py b/qurator/dinglehopper/tests/extracted_text_test.py
index 504d2ad..2ce81cd 100644
--- a/qurator/dinglehopper/tests/extracted_text_test.py
+++ b/qurator/dinglehopper/tests/extracted_text_test.py
@@ -95,12 +95,13 @@ def test_textequiv(attributes, expected_index, expected_log, caplog):
caplog.set_level(logging.INFO)
xml = ""
ns = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15"
- text = [f"Text {i}" for i in range(len(attributes) + 1)]
+ text = ["Text {0}".format(i) for i in range(len(attributes) + 1)]
- equiv = [f"{text[i]}"
+ equiv = ["{1}".format(attr, text[i])
for i, attr in enumerate(attributes)]
- textline = f"{xml}{''.join(equiv)}"
+ textline = "{0}{2}"
+ textline = textline.format(xml, ns, ''.join(equiv))
root = ET.fromstring(textline)
result = ExtractedText.from_text_segment(root,