From b9e38487bd0aa3aa3e36f5b017946c15978a9b1c Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 8 Aug 2019 10:49:35 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20Extract=20a=20method=20to=20reso?= =?UTF-8?q?lve=20an=20image=20as=20a=20Numpy=20array?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ocrd_calamari/ocr.py | 9 ++++----- requirements.txt | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ocrd_calamari/ocr.py b/ocrd_calamari/ocr.py index 6a793e0..24cea7a 100644 --- a/ocrd_calamari/ocr.py +++ b/ocrd_calamari/ocr.py @@ -24,7 +24,6 @@ class CalamariOcr(Processor): kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd-calamari-ocr'] super(CalamariOcr, self).__init__(*args, **kwargs) - def _init_calamari(self): checkpoints = glob('/home/mike/devel/experiments/train-calamari-gt4histocr/models/*.ckpt.json') # XXX self.predictor = MultiPredictor(checkpoints=checkpoints) @@ -33,6 +32,8 @@ class CalamariOcr(Processor): voter_params.type = VoterParams.Type.Value('confidence_voter_default_ctc'.upper()) self.voter = voter_from_proto(voter_params) + def resolve_image_as_np(self, image_url, coords): + return np.array(self.workspace.resolve_image_as_pil(image_url, coords), dtype=np.uint8) def process(self): """ @@ -51,12 +52,10 @@ class CalamariOcr(Processor): log.info("About to recognize %i lines of region '%s'", len(textlines), region.id) for (line_no, line) in enumerate(textlines): log.debug("Recognizing line '%s' in region '%s'", line_no, region.id) - image = self.workspace.resolve_image_as_pil(image_url, - polygon_from_points(line.get_Coords().points)) - image_np = np.array(image, dtype=np.uint8) # XXX better way? - raw_results = list(self.predictor.predict_raw([image_np], progress_bar=False))[0] + image = self.resolve_image_as_np(image_url, polygon_from_points(line.get_Coords().points)) + raw_results = list(self.predictor.predict_raw([image], progress_bar=False))[0] for i, p in enumerate(raw_results): p.prediction.id = "fold_{}".format(i) diff --git a/requirements.txt b/requirements.txt index 1c3fd3e..552c477 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +numpy calamari-ocr tensorflow-gpu click