From f6a2c94520dcf79892278320b29e3906d4a5f4bb Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 16 Apr 2025 18:55:42 +0200 Subject: [PATCH] ocrd_cli: but do check for existing output files Co-authored-by: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> --- src/dinglehopper/ocrd_cli.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/dinglehopper/ocrd_cli.py b/src/dinglehopper/ocrd_cli.py index 90db7d1..dbf59be 100644 --- a/src/dinglehopper/ocrd_cli.py +++ b/src/dinglehopper/ocrd_cli.py @@ -59,8 +59,12 @@ class OcrdDinglehopperEvaluate(Processor): [".html", "text/html"], [".json", "application/json"], ]: + output_file_id = file_id + report_suffix + output_file = next(self.workspace.mets.find_files(ID=output_file_id), None) + if output_file and config.OCRD_EXISTING_OUTPUT != 'OVERWRITE': + raise FileExistsError(f"A file with ID=={output_file_id} already exists {output_file} and neither force nor ignore are set") self.workspace.add_file( - file_id=file_id + report_suffix, + file_id=output_file_id, file_grp=self.output_file_grp, page_id=page_id, mimetype=mimetype,