diff --git a/src/dinglehopper/ocrd-tool.json b/src/dinglehopper/ocrd-tool.json index f4572c7..d9b3f1f 100644 --- a/src/dinglehopper/ocrd-tool.json +++ b/src/dinglehopper/ocrd-tool.json @@ -29,6 +29,11 @@ "enum": ["region", "line"], "default": "region", "description": "PAGE XML hierarchy level to extract the text from" + }, + "plain_encoding": { + "type": "string", + "default": "autodetect", + "description": "Encoding (e.g. \"utf-8\") of plain text files" } } } diff --git a/src/dinglehopper/ocrd_cli.py b/src/dinglehopper/ocrd_cli.py index 4da4960..2023e03 100644 --- a/src/dinglehopper/ocrd_cli.py +++ b/src/dinglehopper/ocrd_cli.py @@ -36,6 +36,7 @@ class OcrdDinglehopperEvaluate(Processor): metrics = self.parameter["metrics"] textequiv_level = self.parameter["textequiv_level"] + plain_encoding = self.parameter["plain_encoding"] gt_grp, ocr_grp = self.input_file_grp.split(",") input_file_tuples = self.zip_input_files(on_error="abort") @@ -63,6 +64,7 @@ class OcrdDinglehopperEvaluate(Processor): report_prefix, metrics=metrics, textequiv_level=textequiv_level, + plain_encoding=plain_encoding, ) # Add reports to the workspace