From 74e0ac18ed0a59607ca8d567203c9a332884a27e Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 19 Nov 2020 16:00:28 +0100 Subject: [PATCH] ocrd cli: use core-provided zip_input_files method --- qurator/dinglehopper/ocrd_cli.py | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/qurator/dinglehopper/ocrd_cli.py b/qurator/dinglehopper/ocrd_cli.py index 008b70c..adfbbab 100644 --- a/qurator/dinglehopper/ocrd_cli.py +++ b/qurator/dinglehopper/ocrd_cli.py @@ -34,7 +34,7 @@ class OcrdDinglehopperEvaluate(Processor): textequiv_level = self.parameter["textequiv_level"] gt_grp, ocr_grp = self.input_file_grp.split(",") - input_file_tuples = self._zip_input_files([gt_grp, ocr_grp]) + input_file_tuples = self.zip_input_files(on_error='abort') for n, (gt_file, ocr_file) in enumerate(input_file_tuples): if not gt_file or not ocr_file: # file/page was not found in this group @@ -77,31 +77,5 @@ class OcrdDinglehopperEvaluate(Processor): # Clear cache between files levenshtein_matrix_cache_clear() - def _zip_input_files(self, input_file_grps): - log = getLogger("processor.OcrdDinglehopperEvaluate") - input_file_tuples = list() - for page_id in ( - [self.page_id] if self.page_id else self.workspace.mets.physical_pages - ): - ifiles = list() - for input_file_grp in input_file_grps: - log.debug( - "Adding input file group %s to page %s", input_file_grp, page_id - ) - files = self.workspace.mets.find_all_files( - pageId=page_id, fileGrp=input_file_grp - ) - if not files: - log.error( - 'Found no page "%s" in file group %s', page_id, input_file_grp - ) - ifiles.append(None) - else: - ifiles.append(files[0]) - if ifiles[0]: - input_file_tuples.append(tuple(ifiles)) - return input_file_tuples - - if __name__ == "__main__": ocrd_dinglehopper()