OCR-D processor: properly handle missing or non-downloaded GT/OCR file

Co-authored-by: Robert Sachunsky <38561704+bertsky@users.noreply.github.com>
pull/128/head
Konstantin Baierer 4 days ago committed by Mike Gerber
parent 8c1b6d65f5
commit c0aa82d188

@ -27,14 +27,19 @@ class OcrdDinglehopperEvaluate(Processor):
metrics = self.parameter["metrics"]
textequiv_level = self.parameter["textequiv_level"]
try:
gt_file, ocr_file = input_files
assert gt_file, 'missing GT file'
assert ocr_file, 'missing OCR file'
assert gt_file.local_filename
assert ocr_file.local_filename
except (ValueError, AssertionError) as err:
self.logger.warning(f'Missing either GT file, OCR file or both: {err}') # TODO how to log which page?
# wrong number of inputs: let fail
gt_file, ocr_file = input_files
# missing on either side: skip (zip_input_files already warned)
if not gt_file or not ocr_file:
return
# missing download (i.e. OCRD_DOWNLOAD_INPUT=false):
if not gt_file.local_filename:
if config.OCRD_MISSING_INPUT == 'ABORT':
raise MissingInputFile(gt_file.fileGrp, gt_file.pageId, gt_file.mimetype)
return
if not ocr_file.local_filename:
if config.OCRD_MISSING_INPUT == 'ABORT':
raise MissingInputFile(ocr_file.fileGrp, ocr_file.pageId, ocr_file.mimetype)
return
page_id = gt_file.pageId

Loading…
Cancel
Save