From c9737683b130835019a5f8aef88f9d6b173c9d74 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 6 May 2022 19:59:19 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20ALTO:=20Add=20Layout/Page's=20attri?= =?UTF-8?q?bute=20values?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/modstool/altotool.py | 5 ++++- qurator/modstool/lib.py | 13 +++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/qurator/modstool/altotool.py b/qurator/modstool/altotool.py index b38dddf..fee1f73 100755 --- a/qurator/modstool/altotool.py +++ b/qurator/modstool/altotool.py @@ -53,7 +53,10 @@ def alto_to_dict(alto, raise_errors=True): elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}softwareVersion': value['softwareVersion'] = TagGroup(tag, group).is_singleton().has_no_attributes().text() elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Layout': - pass # TODO + value['Layout'] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors) + elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Page': + value['Page'] = TagGroup(tag, group).is_singleton().attributes() + # TODO subelements elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Styles': pass else: diff --git a/qurator/modstool/lib.py b/qurator/modstool/lib.py index b24b698..c4ff8b0 100644 --- a/qurator/modstool/lib.py +++ b/qurator/modstool/lib.py @@ -181,6 +181,19 @@ class TagGroup: value[sub_tag] = s return value + def attributes(self): + """ + Return a merged dict of all attributes of the tag group. + + Probably most useful if used on a singleton, for example: + + value['Page'] = TagGroup(tag, group).is_singleton().attributes() + """ + attrib = {} + for e in self.group: + attrib.update(e.attrib) + return attrib + def sorted_groupby(iterable, key=None): """