From 4338259ca1be4fbca6c7ffe1a921939c257c8e68 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Mon, 31 Mar 2025 01:17:14 +0200 Subject: [PATCH] OCR-D: ensure page image gets replaced in result as well if not the original file --- src/eynollah/processor.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/eynollah/processor.py b/src/eynollah/processor.py index f4db854..812ba25 100644 --- a/src/eynollah/processor.py +++ b/src/eynollah/processor.py @@ -41,18 +41,20 @@ class EynollahProcessor(Processor): assert input_pcgts[0] assert self.parameter pcgts = input_pcgts[0] + result = OcrdPageResult(pcgts) page = pcgts.get_Page() - # if not('://' in page.imageFilename): - # image_filename = next(self.workspace.mets.find_files(local_filename=page.imageFilename)).local_filename - # else: - # # could be a URL with file:// or truly remote - # image_filename = self.workspace.download_file(next(self.workspace.mets.find_files(url=page.imageFilename))).local_filename page_image, _, _ = self.workspace.image_from_page( page, page_id, # avoid any features that would change the coordinate system: cropped,deskewed # (the PAGE builder merely adds regions, so afterwards we would not know which to transform) # also avoid binarization as models usually fare better on grayscale/RGB feature_filter='cropped,deskewed,binarized') + if hasattr(page_image, 'filename'): + image_filename = page_image.filename + else: + image_filename = "dummy" # will be replaced by ocrd.Processor.process_page_file + result.images.append(OcrdPageResultImage(page_image, '.IMG', page)) # mark as new original + # FIXME: mask out already existing regions (incremental segmentation) eynollah = Eynollah( self.resolve_resource(self.parameter['models']), logger=self.logger, @@ -68,7 +70,7 @@ class EynollahProcessor(Processor): tables=self.parameter['tables'], override_dpi=self.parameter['dpi'], pcgts=pcgts, - image_filename=page.imageFilename, + image_filename=image_filename, image_pil=page_image ) if self.models is not None: @@ -76,4 +78,4 @@ class EynollahProcessor(Processor): eynollah.models = self.models eynollah.run() self.models = eynollah.models - return OcrdPageResult(pcgts) + return result