diff --git a/src/dinglehopper/ocrd-tool.json b/src/dinglehopper/ocrd-tool.json index 43795e1..ae7c9bb 100644 --- a/src/dinglehopper/ocrd-tool.json +++ b/src/dinglehopper/ocrd-tool.json @@ -25,6 +25,11 @@ "enum": ["region", "line"], "default": "region", "description": "PAGE XML hierarchy level to extract the text from" + }, + "plain_encoding": { + "type": "string", + "default": "autodetect", + "description": "Encoding (e.g. \"utf-8\") of plain text files" } } } diff --git a/src/dinglehopper/ocrd_cli.py b/src/dinglehopper/ocrd_cli.py index fa4747f..2d7da8e 100644 --- a/src/dinglehopper/ocrd_cli.py +++ b/src/dinglehopper/ocrd_cli.py @@ -26,6 +26,7 @@ class OcrdDinglehopperEvaluate(Processor): assert self.parameter metrics = self.parameter["metrics"] textequiv_level = self.parameter["textequiv_level"] + plain_encoding = self.parameter["plain_encoding"] # wrong number of inputs: let fail gt_file, ocr_file = input_files @@ -52,6 +53,7 @@ class OcrdDinglehopperEvaluate(Processor): self.output_file_grp, metrics=metrics, textequiv_level=textequiv_level, + plain_encoding=plain_encoding, ) # Add reports to the workspace