From 8b01d9e671d1527bdf52983ddd0799bcb1f4feda Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Wed, 5 Aug 2020 17:16:40 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20sbb=5Ftextline=5Fdetection:=20Se?= =?UTF-8?q?t=20pcGtsId?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newest OCR-D workspace validation requires that the pcGtsId of a PAGE-XML file matches its METS mets:file/ID. Fix this by setting it correctly. --- qurator/sbb_textline_detector/ocrd_cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/qurator/sbb_textline_detector/ocrd_cli.py b/qurator/sbb_textline_detector/ocrd_cli.py index a42027c..ea1b4e3 100644 --- a/qurator/sbb_textline_detector/ocrd_cli.py +++ b/qurator/sbb_textline_detector/ocrd_cli.py @@ -77,6 +77,7 @@ class OcrdSbbTextlineDetectorRecognize(Processor): # Create a new PAGE file from the input file pcgts = page_from_file(self.workspace.download_file(input_file)) + pcgts.set_pcGtsId(file_id) page = pcgts.get_Page() # Merge results → PAGE file