ALTO: Support more ALTO versions

master
Gerber, Mike 3 years ago
parent 937e7d74eb
commit 9b3db1cd1d

@ -39,11 +39,14 @@ def alto_to_dict(alto, raise_errors=True):
value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().text()
elif localname == 'OCRProcessing':
value[localname] = TagGroup(tag, group).is_singleton().descend(raise_errors)
elif localname == 'Processing':
# TODO This enumerated descent is used more than once, DRY!
for n, e in enumerate(group):
value[f'{localname}{n}'] = alto_to_dict(e, raise_errors)
elif localname == 'ocrProcessingStep':
for n, e in enumerate(group):
value[f'{localname}{n}'] = alto_to_dict(e, raise_errors)
elif localname == 'preProcessingStep':
# TODO This enumerated descent is used more than once, DRY!
for n, e in enumerate(group):
value[f'{localname}{n}'] = alto_to_dict(e, raise_errors)
elif localname == 'processingDateTime':
@ -76,6 +79,8 @@ def alto_to_dict(alto, raise_errors=True):
value['Page'].update(TagGroup(tag, group).subelement_counts())
elif localname == 'Styles':
pass
elif localname == 'Tags':
pass
else:
if raise_errors:
print(value)

@ -231,6 +231,7 @@ def _to_dict(root, raise_errors):
"http://schema.ccs-gmbh.com/ALTO",
"http://www.loc.gov/standards/alto/",
"http://www.loc.gov/standards/alto/ns-v2#",
"http://www.loc.gov/standards/alto/ns-v4#",
]:
return alto_to_dict(root, raise_errors)
else:

Loading…
Cancel
Save