From 00e43b1d1f6675de546c52ffe3a386c3fab7eb89 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 22 Dec 2020 19:11:44 +0100 Subject: [PATCH 1/2] use Processor.resolve_files to handle on-demand download of models via registry --- ocrd_calamari/recognize.py | 3 ++- requirements.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py index 6269e55..d896473 100644 --- a/ocrd_calamari/recognize.py +++ b/ocrd_calamari/recognize.py @@ -41,7 +41,8 @@ class CalamariRecognize(Processor): os.environ['TF_CPP_MIN_LOG_LEVEL'] = TF_CPP_MIN_LOG_LEVEL if self.parameter.get('checkpoint_dir', None): - self.parameter['checkpoint'] = '%s/*.ckpt.json' % self.parameter['checkpoint_dir'] + resolved = self.resolve_resource(self.parameter['checkpoint_dir']) + self.parameter['checkpoint'] = '%s/*.ckpt.json' % resolved checkpoints = glob(self.parameter['checkpoint']) self.predictor = MultiPredictor(checkpoints=checkpoints) diff --git a/requirements.txt b/requirements.txt index 20b2ff2..cbfb800 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,4 @@ tensorflow >= 2.3.0rc2 calamari-ocr == 1.0.* setuptools >= 41.0.0 # tensorboard depends on this, but why do we get an error at runtime? click -ocrd >= 2.13.0 +ocrd >= 2.22.0 From 03f5e44e624a09577f4bea3defbb27059c971a9d Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 27 Jan 2021 13:59:45 +0100 Subject: [PATCH 2/2] define default for checkpoint_dir, but allow checkpoint still --- ocrd_calamari/ocrd-tool.json | 2 +- ocrd_calamari/recognize.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ocrd_calamari/ocrd-tool.json b/ocrd_calamari/ocrd-tool.json index 691eeba..467cdec 100644 --- a/ocrd_calamari/ocrd-tool.json +++ b/ocrd_calamari/ocrd-tool.json @@ -20,7 +20,7 @@ "parameters": { "checkpoint_dir": { "description": "The directory containing calamari model files (*.ckpt.json). Uses all checkpoints in that directory", - "type": "string", "format": "file", "cacheable": true + "type": "string", "format": "file", "cacheable": true, "default": "qurator-gt4histocr-1.0" }, "checkpoint": { "description": "The calamari model files (*.ckpt.json)", diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py index d896473..0fe03c7 100644 --- a/ocrd_calamari/recognize.py +++ b/ocrd_calamari/recognize.py @@ -40,7 +40,7 @@ class CalamariRecognize(Processor): def _init_calamari(self): os.environ['TF_CPP_MIN_LOG_LEVEL'] = TF_CPP_MIN_LOG_LEVEL - if self.parameter.get('checkpoint_dir', None): + if not self.parameter.get('checkpoint', None) and self.parameter.get('checkpoint_dir', None): resolved = self.resolve_resource(self.parameter['checkpoint_dir']) self.parameter['checkpoint'] = '%s/*.ckpt.json' % resolved checkpoints = glob(self.parameter['checkpoint'])