From 9e9163e8520000a281ff38def183a249eb3228df Mon Sep 17 00:00:00 2001 From: wrznr Date: Tue, 10 Dec 2019 16:55:43 +0100 Subject: [PATCH 1/2] Simplify the iteration over files in the input file group --- qurator/sbb_textline_detector/ocrd_cli.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/qurator/sbb_textline_detector/ocrd_cli.py b/qurator/sbb_textline_detector/ocrd_cli.py index 8aa13fe..a698a1a 100644 --- a/qurator/sbb_textline_detector/ocrd_cli.py +++ b/qurator/sbb_textline_detector/ocrd_cli.py @@ -51,8 +51,7 @@ class OcrdSbbTextlineDetectorRecognize(Processor): return image_file def process(self): - for n, page_id in enumerate(self.workspace.mets.physical_pages): - input_file = self.workspace.mets.find_files(fileGrp=self.input_file_grp, pageId=page_id)[0] + for (n, input_file) in enumerate(self.input_files): log.info("INPUT FILE %i / %s", n, input_file) file_id = self._make_file_id(input_file, self.input_file_grp, n) From 4fc57d7756f809e52e189b51d6b585b235d0ce7f Mon Sep 17 00:00:00 2001 From: wrznr Date: Tue, 10 Dec 2019 16:59:45 +0100 Subject: [PATCH 2/2] Assign page id --- qurator/sbb_textline_detector/ocrd_cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/qurator/sbb_textline_detector/ocrd_cli.py b/qurator/sbb_textline_detector/ocrd_cli.py index a698a1a..ad9c3ac 100644 --- a/qurator/sbb_textline_detector/ocrd_cli.py +++ b/qurator/sbb_textline_detector/ocrd_cli.py @@ -52,6 +52,7 @@ class OcrdSbbTextlineDetectorRecognize(Processor): def process(self): for (n, input_file) in enumerate(self.input_files): + page_id = input_file.pageId or input_file.ID log.info("INPUT FILE %i / %s", n, input_file) file_id = self._make_file_id(input_file, self.input_file_grp, n)