From 027fcd7d75c621f9130c2b3b96e5d996360ac893 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Tue, 21 Jul 2020 20:10:36 +0200 Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=90=9B=20Fix=20test=20file=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test_recognize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_recognize.py b/test/test_recognize.py index 5db48cf..54faf87 100644 --- a/test/test_recognize.py +++ b/test/test_recognize.py @@ -12,7 +12,7 @@ from ocrd_calamari import CalamariRecognize from .base import assets -METS_KANT = assets.url_of('kant_aufklaerung_1784-page-block-line-word_glyph/data/mets.xml') +METS_KANT = assets.url_of('kant_aufklaerung_1784-page-region-line-word_glyph/data/mets.xml') WORKSPACE_DIR = '/tmp/test-ocrd-calamari' CHECKPOINT = os.path.join(os.getcwd(), 'gt4histocr-calamari/*.ckpt.json') From 9ea50e25d17acbc19426c15860c7b37b194f0b67 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Tue, 21 Jul 2020 18:16:52 +0200 Subject: [PATCH 2/4] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Update=20to=20Calamari?= =?UTF-8?q?=201.0.x?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0a426e0..53a18b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ numpy -tensorflow-gpu == 1.15.* -calamari-ocr == 0.3.5 +tensorflow-gpu == 2.2.* +calamari-ocr == 1.0.* setuptools >= 41.0.0 # tensorboard depends on this, but why do we get an error at runtime? click ocrd >= 2.2.1 From 7584d0135ce6f3988909244a1a6f49fbadf314ad Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Tue, 21 Jul 2020 18:52:47 +0200 Subject: [PATCH 3/4] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Update=20model=20downl?= =?UTF-8?q?oad=20for=20Calamari=201.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e65df22..61ca7f3 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ install: gt4histocr-calamari: mkdir gt4histocr-calamari cd gt4histocr-calamari && \ - wget https://qurator-data.de/calamari-models/GT4HistOCR/model.tar.xz && \ + wget https://qurator-data.de/calamari-models/GT4HistOCR/2019-12-11T11_10+0100/model.tar.xz && \ tar xfv model.tar.xz && \ rm model.tar.xz From 93190fae3b3d8b5b9a68b37f604c43c34979e5d4 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Wed, 22 Jul 2020 16:03:10 +0200 Subject: [PATCH 4/4] =?UTF-8?q?=E2=9A=A1=20Recognize=20more=20than=20one?= =?UTF-8?q?=20line=20at=20a=20time=20(Fixes=20gh#20)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ocrd_calamari/recognize.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py index 8ae0a17..d040550 100644 --- a/ocrd_calamari/recognize.py +++ b/ocrd_calamari/recognize.py @@ -72,13 +72,16 @@ class CalamariRecognize(Processor): textlines = region.get_TextLine() log.info("About to recognize %i lines of region '%s'", len(textlines), region.id) - for (line_no, line) in enumerate(textlines): - log.debug("Recognizing line '%s' in region '%s'", line.id, region.id) + line_images_np = [] + for (line_no, line) in enumerate(textlines): line_image, line_coords = self.workspace.image_from_segment(line, region_image, region_xywh) line_image_np = np.array(line_image, dtype=np.uint8) + line_images_np.append(line_image_np) + raw_results_all = self.predictor.predict_raw(line_images_np, progress_bar=False) + + for line, raw_results in zip(textlines, raw_results_all): - raw_results = list(self.predictor.predict_raw([line_image_np], progress_bar=False))[0] for i, p in enumerate(raw_results): p.prediction.id = "fold_{}".format(i)