diff --git a/src/dinglehopper/ocrd_cli.py b/src/dinglehopper/ocrd_cli.py index 9696ff9..52da817 100644 --- a/src/dinglehopper/ocrd_cli.py +++ b/src/dinglehopper/ocrd_cli.py @@ -27,14 +27,19 @@ class OcrdDinglehopperEvaluate(Processor): metrics = self.parameter["metrics"] textequiv_level = self.parameter["textequiv_level"] - try: - gt_file, ocr_file = input_files - assert gt_file, 'missing GT file' - assert ocr_file, 'missing OCR file' - assert gt_file.local_filename - assert ocr_file.local_filename - except (ValueError, AssertionError) as err: - self.logger.warning(f'Missing either GT file, OCR file or both: {err}') # TODO how to log which page? + # wrong number of inputs: let fail + gt_file, ocr_file = input_files + # missing on either side: skip (zip_input_files already warned) + if not gt_file or not ocr_file: + return + # missing download (i.e. OCRD_DOWNLOAD_INPUT=false): + if not gt_file.local_filename: + if config.OCRD_MISSING_INPUT == 'ABORT': + raise MissingInputFile(gt_file.fileGrp, gt_file.pageId, gt_file.mimetype) + return + if not ocr_file.local_filename: + if config.OCRD_MISSING_INPUT == 'ABORT': + raise MissingInputFile(ocr_file.fileGrp, ocr_file.pageId, ocr_file.mimetype) return page_id = gt_file.pageId