🚧 Process lines

fix/readme-no-checkpoint
Gerber, Mike 5 years ago
parent 2ebf3c0e00
commit dbe43e2316

@ -0,0 +1,10 @@
import click
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
from ocrd_calamari.ocr import CalamariOcr
@click.command()
@ocrd_cli_options
def ocrd_calamari_ocr(*args, **kwargs):
return ocrd_cli_wrap_processor(CalamariOcr, *args, **kwargs)

@ -0,0 +1,4 @@
import json
from pkg_resources import resource_string
OCRD_TOOL = json.loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8'))

@ -1,18 +1,46 @@
from __future__ import absolute_import from __future__ import absolute_import
from calamari_ocr.scripts.predict import run
log = getLogger('processor.KrakenOcr') from glob import glob
class KrakenOcr(Processor): import numpy as np
from calamari_ocr.ocr import MultiPredictor
from calamari_ocr.ocr.voting import voter_from_proto
from calamari_ocr.proto import VoterParams
from ocrd import Processor
from ocrd.logging import getLogger
from ocrd.model import ocrd_page
from ocrd.utils import polygon_from_points
from ocrd_calamari.config import OCRD_TOOL
log = getLogger('processor.CalamariOcr')
# TODO: Should this be "recognize", not "ocr" akin ocrd_tesserocr?
class CalamariOcr(Processor):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd-calamari-ocr'] kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd-calamari-ocr']
super(KrakenOcr, self).__init__(*args, **kwargs) super(CalamariOcr, self).__init__(*args, **kwargs)
def _init_calamari(self):
checkpoints = glob('/home/mike/devel/experiments/train-calamari-gt4histocr/models/*.ckpt.json') # XXX
self.predictor = MultiPredictor(checkpoints=checkpoints)
voter_params = VoterParams()
voter_params.type = VoterParams.Type.Value('confidence_voter_default_ctc'.upper())
self.voter = voter_from_proto(voter_params)
def process(self): def process(self):
""" """
Performs the binarization. Performs the recognition.
""" """
self._init_calamari()
for (n, input_file) in enumerate(self.input_files): for (n, input_file) in enumerate(self.input_files):
log.info("INPUT FILE %i / %s", n, input_file) log.info("INPUT FILE %i / %s", n, input_file)
pcgts = ocrd_page.from_file(self.workspace.download_file(input_file)) pcgts = ocrd_page.from_file(self.workspace.download_file(input_file))
@ -20,20 +48,22 @@ class KrakenOcr(Processor):
log.info("pcgts %s", pcgts) log.info("pcgts %s", pcgts)
for region in pcgts.get_Page().get_TextRegion(): for region in pcgts.get_Page().get_TextRegion():
textlines = region.get_TextLine() textlines = region.get_TextLine()
log.info("About to binarize %i lines of region '%s'", len(textlines), region.id) log.info("About to recognize %i lines of region '%s'", len(textlines), region.id)
for (line_no, line) in enumerate(textlines): for (line_no, line) in enumerate(textlines):
log.debug("Binarizing line '%s' in region '%s'", line_no, region.id) log.debug("Recognizing line '%s' in region '%s'", line_no, region.id)
image = self.workspace.resolve_image_as_pil(image_url, polygon_from_points(line.get_Coords().points)) image = self.workspace.resolve_image_as_pil(image_url,
print(dir(kraken.binarization)) polygon_from_points(line.get_Coords().points))
bin_image = kraken.binarization.nlbin(image) image_np = np.array(image, dtype=np.uint8) # XXX better way?
bin_image_bytes = io.BytesIO()
bin_image.save(bin_image_bytes, format='PNG') raw_results = list(self.predictor.predict_raw([image_np], progress_bar=False))[0]
ID = concat_padded(self.output_file_grp, n)
self.add_output_file( for i, p in enumerate(raw_results):
ID=ID, p.prediction.id = "fold_{}".format(i)
file_grp=self.output_file_grp,
basename="%s.bin.png" % ID, prediction = self.voter.vote_prediction_result(raw_results)
mimetype='image/png', prediction.id = "voted"
content=bin_image_bytes.getvalue()
)
print('***', prediction.sentence)
print(prediction.avg_char_probability)
for raw_result in raw_results:
print(raw_result.sentence)

@ -10,13 +10,19 @@
"steps": [ "steps": [
"recognition/text-recognition" "recognition/text-recognition"
], ],
"description": "Recognize lines with kraken", "description": "Recognize lines with Calamari",
"input_file_grp": [
"OCR-D-SEG-LINE"
],
"output_file_grp": [
"OCR-D-OCR-CALAMARI"
],
"parameters": { "parameters": {
"checkpoint": {"type": "string", "format": "file", "cacheable": true}, "XXX checkpoint": {"type": "string", "format": "file", "cacheable": true},
"processes": {"type": "number", "default": 1}, "processes": {"type": "number", "default": 1},
"batch_size": {"type": "number", "default": 1}, "batch_size": {"type": "number", "default": 1},
"voter": {"type": "string", "default": "confidence_voter_default_ctc"}, "XXX voter": {"type": "string", "default": "confidence_voter_default_ctc"},
"extended_prediction_data_format": {"type": "string", "default": "json"}, "XXXX extended_prediction_data_format": {"type": "string", "default": "json"},
"XXX output_dir": "TODO", "XXX output_dir": "TODO",
"XXX extended_prediction_data": "TODO" "XXX extended_prediction_data": "TODO"
} }

@ -0,0 +1,4 @@
calamari-ocr
tensorflow-gpu
click
ocrd

@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
"""
Installs one executable:
- ocrd_calamari_ocr
"""
import codecs
from setuptools import setup, find_packages
setup(
name='ocrd_calamari',
version='0.0.1',
description='Calamari bindings',
long_description=codecs.open('README.md', encoding='utf-8').read(),
author='Konstantin Baierer, Mike Gerber',
author_email='unixprog@gmail.com, mike.gerber@sbb.spk-berlin.de',
url='https://github.com/OCR-D/ocrd_calamari', # XXX
license='Apache License 2.0',
packages=find_packages(exclude=('tests', 'docs')),
install_requires=open('requirements.txt').read().split('\n'),
package_data={
'': ['*.json', '*.yml', '*.yaml'],
},
entry_points={
'console_scripts': [
'ocrd-calamari-ocr=ocrd_calamari.cli:ocrd_calamari_ocr',
]
},
)
Loading…
Cancel
Save