mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-06-08 11:20:07 +02:00
✨ ALTO: Count Layout/Page/* elements
This commit is contained in:
parent
c9737683b1
commit
1c62085612
2 changed files with 13 additions and 2 deletions
|
@ -55,8 +55,9 @@ def alto_to_dict(alto, raise_errors=True):
|
||||||
elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Layout':
|
elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Layout':
|
||||||
value['Layout'] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors)
|
value['Layout'] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors)
|
||||||
elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Page':
|
elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Page':
|
||||||
value['Page'] = TagGroup(tag, group).is_singleton().attributes()
|
value['Page'] = {}
|
||||||
# TODO subelements
|
value['Page'].update(TagGroup(tag, group).is_singleton().attributes())
|
||||||
|
value['Page'].update(TagGroup(tag, group).subelement_counts())
|
||||||
elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Styles':
|
elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Styles':
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -194,6 +194,16 @@ class TagGroup:
|
||||||
attrib.update(e.attrib)
|
attrib.update(e.attrib)
|
||||||
return attrib
|
return attrib
|
||||||
|
|
||||||
|
def subelement_counts(self):
|
||||||
|
counts = {}
|
||||||
|
for e in self.group:
|
||||||
|
for x in e.iter():
|
||||||
|
tag = ET.QName(x).localname
|
||||||
|
key = f"{tag}-count"
|
||||||
|
counts[key] = counts.get(key, 0) + 1
|
||||||
|
return counts
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def sorted_groupby(iterable, key=None):
|
def sorted_groupby(iterable, key=None):
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue