mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-06-09 11:49:55 +02:00
✨ ALTO: Support more ALTO versions
This commit is contained in:
parent
937e7d74eb
commit
9b3db1cd1d
2 changed files with 7 additions and 1 deletions
|
@ -39,11 +39,14 @@ def alto_to_dict(alto, raise_errors=True):
|
||||||
value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().text()
|
value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().text()
|
||||||
elif localname == 'OCRProcessing':
|
elif localname == 'OCRProcessing':
|
||||||
value[localname] = TagGroup(tag, group).is_singleton().descend(raise_errors)
|
value[localname] = TagGroup(tag, group).is_singleton().descend(raise_errors)
|
||||||
|
elif localname == 'Processing':
|
||||||
|
# TODO This enumerated descent is used more than once, DRY!
|
||||||
|
for n, e in enumerate(group):
|
||||||
|
value[f'{localname}{n}'] = alto_to_dict(e, raise_errors)
|
||||||
elif localname == 'ocrProcessingStep':
|
elif localname == 'ocrProcessingStep':
|
||||||
for n, e in enumerate(group):
|
for n, e in enumerate(group):
|
||||||
value[f'{localname}{n}'] = alto_to_dict(e, raise_errors)
|
value[f'{localname}{n}'] = alto_to_dict(e, raise_errors)
|
||||||
elif localname == 'preProcessingStep':
|
elif localname == 'preProcessingStep':
|
||||||
# TODO This enumerated descent is used more than once, DRY!
|
|
||||||
for n, e in enumerate(group):
|
for n, e in enumerate(group):
|
||||||
value[f'{localname}{n}'] = alto_to_dict(e, raise_errors)
|
value[f'{localname}{n}'] = alto_to_dict(e, raise_errors)
|
||||||
elif localname == 'processingDateTime':
|
elif localname == 'processingDateTime':
|
||||||
|
@ -76,6 +79,8 @@ def alto_to_dict(alto, raise_errors=True):
|
||||||
value['Page'].update(TagGroup(tag, group).subelement_counts())
|
value['Page'].update(TagGroup(tag, group).subelement_counts())
|
||||||
elif localname == 'Styles':
|
elif localname == 'Styles':
|
||||||
pass
|
pass
|
||||||
|
elif localname == 'Tags':
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
if raise_errors:
|
if raise_errors:
|
||||||
print(value)
|
print(value)
|
||||||
|
|
|
@ -231,6 +231,7 @@ def _to_dict(root, raise_errors):
|
||||||
"http://schema.ccs-gmbh.com/ALTO",
|
"http://schema.ccs-gmbh.com/ALTO",
|
||||||
"http://www.loc.gov/standards/alto/",
|
"http://www.loc.gov/standards/alto/",
|
||||||
"http://www.loc.gov/standards/alto/ns-v2#",
|
"http://www.loc.gov/standards/alto/ns-v2#",
|
||||||
|
"http://www.loc.gov/standards/alto/ns-v4#",
|
||||||
]:
|
]:
|
||||||
return alto_to_dict(root, raise_errors)
|
return alto_to_dict(root, raise_errors)
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue