mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-07 19:05:13 +02:00
OCR-D processor: properly handle missing or non-downloaded GT/OCR file
Co-authored-by: Robert Sachunsky <38561704+bertsky@users.noreply.github.com>
This commit is contained in:
parent
8c1b6d65f5
commit
c0aa82d188
1 changed files with 13 additions and 8 deletions
|
@ -27,14 +27,19 @@ class OcrdDinglehopperEvaluate(Processor):
|
||||||
metrics = self.parameter["metrics"]
|
metrics = self.parameter["metrics"]
|
||||||
textequiv_level = self.parameter["textequiv_level"]
|
textequiv_level = self.parameter["textequiv_level"]
|
||||||
|
|
||||||
try:
|
# wrong number of inputs: let fail
|
||||||
gt_file, ocr_file = input_files
|
gt_file, ocr_file = input_files
|
||||||
assert gt_file, 'missing GT file'
|
# missing on either side: skip (zip_input_files already warned)
|
||||||
assert ocr_file, 'missing OCR file'
|
if not gt_file or not ocr_file:
|
||||||
assert gt_file.local_filename
|
return
|
||||||
assert ocr_file.local_filename
|
# missing download (i.e. OCRD_DOWNLOAD_INPUT=false):
|
||||||
except (ValueError, AssertionError) as err:
|
if not gt_file.local_filename:
|
||||||
self.logger.warning(f'Missing either GT file, OCR file or both: {err}') # TODO how to log which page?
|
if config.OCRD_MISSING_INPUT == 'ABORT':
|
||||||
|
raise MissingInputFile(gt_file.fileGrp, gt_file.pageId, gt_file.mimetype)
|
||||||
|
return
|
||||||
|
if not ocr_file.local_filename:
|
||||||
|
if config.OCRD_MISSING_INPUT == 'ABORT':
|
||||||
|
raise MissingInputFile(ocr_file.fileGrp, ocr_file.pageId, ocr_file.mimetype)
|
||||||
return
|
return
|
||||||
|
|
||||||
page_id = gt_file.pageId
|
page_id = gt_file.pageId
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue