diff --git a/ocrd_calamari/cli.py b/ocrd_calamari/cli.py new file mode 100644 index 0000000..7a28dad --- /dev/null +++ b/ocrd_calamari/cli.py @@ -0,0 +1,10 @@ +import click + +from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor +from ocrd_calamari.ocr import CalamariOcr + + +@click.command() +@ocrd_cli_options +def ocrd_calamari_ocr(*args, **kwargs): + return ocrd_cli_wrap_processor(CalamariOcr, *args, **kwargs) diff --git a/ocrd_calamari/config.py b/ocrd_calamari/config.py new file mode 100644 index 0000000..01e0b23 --- /dev/null +++ b/ocrd_calamari/config.py @@ -0,0 +1,4 @@ +import json +from pkg_resources import resource_string + +OCRD_TOOL = json.loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8')) diff --git a/ocrd_calamari/ocr.py b/ocrd_calamari/ocr.py index 2dd6038..6a793e0 100644 --- a/ocrd_calamari/ocr.py +++ b/ocrd_calamari/ocr.py @@ -1,18 +1,46 @@ from __future__ import absolute_import -from calamari_ocr.scripts.predict import run -log = getLogger('processor.KrakenOcr') +from glob import glob -class KrakenOcr(Processor): +import numpy as np +from calamari_ocr.ocr import MultiPredictor +from calamari_ocr.ocr.voting import voter_from_proto +from calamari_ocr.proto import VoterParams +from ocrd import Processor +from ocrd.logging import getLogger +from ocrd.model import ocrd_page +from ocrd.utils import polygon_from_points + +from ocrd_calamari.config import OCRD_TOOL + +log = getLogger('processor.CalamariOcr') + +# TODO: Should this be "recognize", not "ocr" akin ocrd_tesserocr? + + +class CalamariOcr(Processor): def __init__(self, *args, **kwargs): kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd-calamari-ocr'] - super(KrakenOcr, self).__init__(*args, **kwargs) + super(CalamariOcr, self).__init__(*args, **kwargs) + + + def _init_calamari(self): + checkpoints = glob('/home/mike/devel/experiments/train-calamari-gt4histocr/models/*.ckpt.json') # XXX + self.predictor = MultiPredictor(checkpoints=checkpoints) + + voter_params = VoterParams() + voter_params.type = VoterParams.Type.Value('confidence_voter_default_ctc'.upper()) + self.voter = voter_from_proto(voter_params) + def process(self): """ - Performs the binarization. + Performs the recognition. """ + + self._init_calamari() + for (n, input_file) in enumerate(self.input_files): log.info("INPUT FILE %i / %s", n, input_file) pcgts = ocrd_page.from_file(self.workspace.download_file(input_file)) @@ -20,20 +48,22 @@ class KrakenOcr(Processor): log.info("pcgts %s", pcgts) for region in pcgts.get_Page().get_TextRegion(): textlines = region.get_TextLine() - log.info("About to binarize %i lines of region '%s'", len(textlines), region.id) + log.info("About to recognize %i lines of region '%s'", len(textlines), region.id) for (line_no, line) in enumerate(textlines): - log.debug("Binarizing line '%s' in region '%s'", line_no, region.id) - image = self.workspace.resolve_image_as_pil(image_url, polygon_from_points(line.get_Coords().points)) - print(dir(kraken.binarization)) - bin_image = kraken.binarization.nlbin(image) - bin_image_bytes = io.BytesIO() - bin_image.save(bin_image_bytes, format='PNG') - ID = concat_padded(self.output_file_grp, n) - self.add_output_file( - ID=ID, - file_grp=self.output_file_grp, - basename="%s.bin.png" % ID, - mimetype='image/png', - content=bin_image_bytes.getvalue() - ) + log.debug("Recognizing line '%s' in region '%s'", line_no, region.id) + image = self.workspace.resolve_image_as_pil(image_url, + polygon_from_points(line.get_Coords().points)) + image_np = np.array(image, dtype=np.uint8) # XXX better way? + + raw_results = list(self.predictor.predict_raw([image_np], progress_bar=False))[0] + + for i, p in enumerate(raw_results): + p.prediction.id = "fold_{}".format(i) + + prediction = self.voter.vote_prediction_result(raw_results) + prediction.id = "voted" + print('***', prediction.sentence) + print(prediction.avg_char_probability) + for raw_result in raw_results: + print(raw_result.sentence) diff --git a/ocrd_calamari/ocrd-tool.json b/ocrd_calamari/ocrd-tool.json index 6c0b0ad..a2a8c4f 100644 --- a/ocrd_calamari/ocrd-tool.json +++ b/ocrd_calamari/ocrd-tool.json @@ -10,13 +10,19 @@ "steps": [ "recognition/text-recognition" ], - "description": "Recognize lines with kraken", + "description": "Recognize lines with Calamari", + "input_file_grp": [ + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-OCR-CALAMARI" + ], "parameters": { - "checkpoint": {"type": "string", "format": "file", "cacheable": true}, + "XXX checkpoint": {"type": "string", "format": "file", "cacheable": true}, "processes": {"type": "number", "default": 1}, "batch_size": {"type": "number", "default": 1}, - "voter": {"type": "string", "default": "confidence_voter_default_ctc"}, - "extended_prediction_data_format": {"type": "string", "default": "json"}, + "XXX voter": {"type": "string", "default": "confidence_voter_default_ctc"}, + "XXXX extended_prediction_data_format": {"type": "string", "default": "json"}, "XXX output_dir": "TODO", "XXX extended_prediction_data": "TODO" } diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1c3fd3e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +calamari-ocr +tensorflow-gpu +click +ocrd \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..8eb4533 --- /dev/null +++ b/setup.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +""" +Installs one executable: + + - ocrd_calamari_ocr +""" +import codecs + +from setuptools import setup, find_packages + +setup( + name='ocrd_calamari', + version='0.0.1', + description='Calamari bindings', + long_description=codecs.open('README.md', encoding='utf-8').read(), + author='Konstantin Baierer, Mike Gerber', + author_email='unixprog@gmail.com, mike.gerber@sbb.spk-berlin.de', + url='https://github.com/OCR-D/ocrd_calamari', # XXX + license='Apache License 2.0', + packages=find_packages(exclude=('tests', 'docs')), + install_requires=open('requirements.txt').read().split('\n'), + package_data={ + '': ['*.json', '*.yml', '*.yaml'], + }, + entry_points={ + 'console_scripts': [ + 'ocrd-calamari-ocr=ocrd_calamari.cli:ocrd_calamari_ocr', + ] + }, +)