diff --git a/qurator/modstool/lib.py b/qurator/modstool/lib.py index 9f01be8..5ebf0ac 100644 --- a/qurator/modstool/lib.py +++ b/qurator/modstool/lib.py @@ -198,7 +198,7 @@ class TagGroup: counts = {} for e in self.group: for x in e.iter(): - tag = ET.QName(x).localname + tag = ET.QName(x.tag).localname key = f"{tag}-count" counts[key] = counts.get(key, 0) + 1 return counts diff --git a/qurator/modstool/tests/test_alto.py b/qurator/modstool/tests/test_alto.py new file mode 100644 index 0000000..bc79d1d --- /dev/null +++ b/qurator/modstool/tests/test_alto.py @@ -0,0 +1,39 @@ +import xml.etree.ElementTree as ET + + +from qurator.modstool.altotool import alto_to_dict +from qurator.modstool.lib import flatten + + +def dict_fromstring(x): + return flatten(alto_to_dict(ET.fromstring(x))) + +def test_Page_counts(): + """ + Elements below Layout/Page should be counted + """ + d = dict_fromstring(""" + + + + + + + + + + + + + + + + + + + + + """) + assert d['Layout_Page_TextBlock-count'] == 1 + assert d['Layout_Page_TextLine-count'] == 3 + assert d['Layout_Page_String-count'] == 6