diff --git a/qurator/modstool/altotool.py b/qurator/modstool/altotool.py index b38dddf..fee1f73 100755 --- a/qurator/modstool/altotool.py +++ b/qurator/modstool/altotool.py @@ -53,7 +53,10 @@ def alto_to_dict(alto, raise_errors=True): elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}softwareVersion': value['softwareVersion'] = TagGroup(tag, group).is_singleton().has_no_attributes().text() elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Layout': - pass # TODO + value['Layout'] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors) + elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Page': + value['Page'] = TagGroup(tag, group).is_singleton().attributes() + # TODO subelements elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Styles': pass else: diff --git a/qurator/modstool/lib.py b/qurator/modstool/lib.py index b24b698..c4ff8b0 100644 --- a/qurator/modstool/lib.py +++ b/qurator/modstool/lib.py @@ -181,6 +181,19 @@ class TagGroup: value[sub_tag] = s return value + def attributes(self): + """ + Return a merged dict of all attributes of the tag group. + + Probably most useful if used on a singleton, for example: + + value['Page'] = TagGroup(tag, group).is_singleton().attributes() + """ + attrib = {} + for e in self.group: + attrib.update(e.attrib) + return attrib + def sorted_groupby(iterable, key=None): """