From 7fde00d9117b1b74b65111c0ff3cd815da4c5851 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 18 May 2021 17:34:08 +0200 Subject: [PATCH] ReadingOrder may also contain UnorderedGroupIndexed --- qurator/dinglehopper/ocr_files.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qurator/dinglehopper/ocr_files.py b/qurator/dinglehopper/ocr_files.py index cd1a9bf..5271727 100644 --- a/qurator/dinglehopper/ocr_files.py +++ b/qurator/dinglehopper/ocr_files.py @@ -97,14 +97,14 @@ def extract_texts_from_reading_order_group(group, tree, nsmap, textequiv_level): ro_children = filter(lambda child: "index" in child.attrib.keys(), ro_children) ro_children = sorted(ro_children, key=lambda child: int(child.attrib["index"])) - elif ET.QName(group.tag).localname == "UnorderedGroup": + elif ET.QName(group.tag).localname in ["UnorderedGroup","UnorderedGroupIndexed"]: ro_children = list(group) else: raise NotImplementedError for ro_child in ro_children: - if ET.QName(ro_child.tag).localname in ["OrderedGroup", "OrderedGroupIndexed", "UnorderedGroup"]: + if ET.QName(ro_child.tag).localname in ["OrderedGroup", "OrderedGroupIndexed", "UnorderedGroup", "UnorderedGroupIndexed"]: regions.extend( extract_texts_from_reading_order_group( ro_child, tree, nsmap, textequiv_level