From c91c9b171416e07bc3bcb28e9928ab24aa1b6cbb Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Tue, 10 May 2022 14:27:39 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20ALTO:=20preProcessingStep/processin?= =?UTF-8?q?gAgency/sourceImageInformation=20etc.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/modstool/altotool.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/qurator/modstool/altotool.py b/qurator/modstool/altotool.py index 66fac8c..431aa1e 100755 --- a/qurator/modstool/altotool.py +++ b/qurator/modstool/altotool.py @@ -42,16 +42,32 @@ def alto_to_dict(alto, raise_errors=True): elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}ocrProcessingStep': for n, e in enumerate(group): value['ocrProcessingStep{}'.format(n)] = alto_to_dict(e, raise_errors) + elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}preProcessingStep': + # TODO This enumerated descent is used more than once, DRY! + for n, e in enumerate(group): + value['preProcessingStep{}'.format(n)] = alto_to_dict(e, raise_errors) elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}processingDateTime': value['processingDateTime'] = TagGroup(tag, group).is_singleton().has_no_attributes().text() elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}processingSoftware': value['processingSoftware'] = TagGroup(tag, group).is_singleton().descend(raise_errors) + elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}processingAgency': + value['processingAgency'] = TagGroup(tag, group).is_singleton().has_no_attributes().text() + elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}processingStepDescription': + value['processingStepDescription'] = TagGroup(tag, group).is_singleton().has_no_attributes().text() + elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}processingStepSettings': + value['processingStepSettings'] = TagGroup(tag, group).is_singleton().has_no_attributes().text() elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}softwareCreator': value['softwareCreator'] = TagGroup(tag, group).is_singleton().has_no_attributes().text() elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}softwareName': value['softwareName'] = TagGroup(tag, group).is_singleton().has_no_attributes().text() elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}softwareVersion': value['softwareVersion'] = TagGroup(tag, group).is_singleton().has_no_attributes().text() + + elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}sourceImageInformation': + value['sourceImageInformation'] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors) + elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}fileName': + value['fileName'] = TagGroup(tag, group).is_singleton().has_no_attributes().text() + elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Layout': value['Layout'] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors) elif tag == '{http://www.loc.gov/standards/alto/ns-v2#}Page':