ALTO: Count Layout/Page/* elements

master
Gerber, Mike 3 years ago
parent c9737683b1
commit 1c62085612

@ -55,8 +55,9 @@ def alto_to_dict(alto, raise_errors=True):
elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Layout': elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Layout':
value['Layout'] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors) value['Layout'] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors)
elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Page': elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Page':
value['Page'] = TagGroup(tag, group).is_singleton().attributes() value['Page'] = {}
# TODO subelements value['Page'].update(TagGroup(tag, group).is_singleton().attributes())
value['Page'].update(TagGroup(tag, group).subelement_counts())
elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Styles': elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Styles':
pass pass
else: else:

@ -194,6 +194,16 @@ class TagGroup:
attrib.update(e.attrib) attrib.update(e.attrib)
return attrib return attrib
def subelement_counts(self):
counts = {}
for e in self.group:
for x in e.iter():
tag = ET.QName(x).localname
key = f"{tag}-count"
counts[key] = counts.get(key, 0) + 1
return counts
def sorted_groupby(iterable, key=None): def sorted_groupby(iterable, key=None):
""" """

Loading…
Cancel
Save