diff --git a/ocrd_repair_inconsistencies/ocrd_repair_inconsistencies.py b/ocrd_repair_inconsistencies/ocrd_repair_inconsistencies.py index bc8bb2b..038922a 100644 --- a/ocrd_repair_inconsistencies/ocrd_repair_inconsistencies.py +++ b/ocrd_repair_inconsistencies/ocrd_repair_inconsistencies.py @@ -7,6 +7,7 @@ from ocrd import Processor from ocrd_modelfactory import page_from_file from ocrd_models.ocrd_page import ( TextRegionType, TextLineType, WordType, + MetadataItemType, LabelsType, LabelType, to_xml ) from ocrd_utils import ( @@ -34,7 +35,20 @@ class RepairInconsistencies(Processor): LOG.info("INPUT FILE %i / %s", n, page_id) pcgts = page_from_file(self.workspace.download_file(input_file)) page = pcgts.get_Page() - + + # add metadata about this operation and its runtime parameters: + metadata = pcgts.get_Metadata() # ensured by from_file() + metadata.add_MetadataItem( + MetadataItemType(type_="processingStep", + name=self.ocrd_tool['steps'][0], + value=TOOL, + Labels=[LabelsType( + externalModel="ocrd-tool", + externalId="parameters", + Label=[LabelType(type_=name, + value=self.parameter[name]) + for name in self.parameter.keys()])])) + regions = page.get_TextRegion() for region in regions: