mirror of
https://github.com/mikegerber/ocrd_calamari.git
synced 2025-06-10 04:09:53 +02:00
🚧 Process lines
This commit is contained in:
parent
2ebf3c0e00
commit
dbe43e2316
6 changed files with 108 additions and 24 deletions
10
ocrd_calamari/cli.py
Normal file
10
ocrd_calamari/cli.py
Normal file
|
@ -0,0 +1,10 @@
|
|||
import click
|
||||
|
||||
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
|
||||
from ocrd_calamari.ocr import CalamariOcr
|
||||
|
||||
|
||||
@click.command()
|
||||
@ocrd_cli_options
|
||||
def ocrd_calamari_ocr(*args, **kwargs):
|
||||
return ocrd_cli_wrap_processor(CalamariOcr, *args, **kwargs)
|
4
ocrd_calamari/config.py
Normal file
4
ocrd_calamari/config.py
Normal file
|
@ -0,0 +1,4 @@
|
|||
import json
|
||||
from pkg_resources import resource_string
|
||||
|
||||
OCRD_TOOL = json.loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8'))
|
|
@ -1,18 +1,46 @@
|
|||
from __future__ import absolute_import
|
||||
from calamari_ocr.scripts.predict import run
|
||||
|
||||
log = getLogger('processor.KrakenOcr')
|
||||
from glob import glob
|
||||
|
||||
class KrakenOcr(Processor):
|
||||
import numpy as np
|
||||
from calamari_ocr.ocr import MultiPredictor
|
||||
from calamari_ocr.ocr.voting import voter_from_proto
|
||||
from calamari_ocr.proto import VoterParams
|
||||
from ocrd import Processor
|
||||
from ocrd.logging import getLogger
|
||||
from ocrd.model import ocrd_page
|
||||
from ocrd.utils import polygon_from_points
|
||||
|
||||
from ocrd_calamari.config import OCRD_TOOL
|
||||
|
||||
log = getLogger('processor.CalamariOcr')
|
||||
|
||||
# TODO: Should this be "recognize", not "ocr" akin ocrd_tesserocr?
|
||||
|
||||
|
||||
class CalamariOcr(Processor):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd-calamari-ocr']
|
||||
super(KrakenOcr, self).__init__(*args, **kwargs)
|
||||
super(CalamariOcr, self).__init__(*args, **kwargs)
|
||||
|
||||
|
||||
def _init_calamari(self):
|
||||
checkpoints = glob('/home/mike/devel/experiments/train-calamari-gt4histocr/models/*.ckpt.json') # XXX
|
||||
self.predictor = MultiPredictor(checkpoints=checkpoints)
|
||||
|
||||
voter_params = VoterParams()
|
||||
voter_params.type = VoterParams.Type.Value('confidence_voter_default_ctc'.upper())
|
||||
self.voter = voter_from_proto(voter_params)
|
||||
|
||||
|
||||
def process(self):
|
||||
"""
|
||||
Performs the binarization.
|
||||
Performs the recognition.
|
||||
"""
|
||||
|
||||
self._init_calamari()
|
||||
|
||||
for (n, input_file) in enumerate(self.input_files):
|
||||
log.info("INPUT FILE %i / %s", n, input_file)
|
||||
pcgts = ocrd_page.from_file(self.workspace.download_file(input_file))
|
||||
|
@ -20,20 +48,22 @@ class KrakenOcr(Processor):
|
|||
log.info("pcgts %s", pcgts)
|
||||
for region in pcgts.get_Page().get_TextRegion():
|
||||
textlines = region.get_TextLine()
|
||||
log.info("About to binarize %i lines of region '%s'", len(textlines), region.id)
|
||||
log.info("About to recognize %i lines of region '%s'", len(textlines), region.id)
|
||||
for (line_no, line) in enumerate(textlines):
|
||||
log.debug("Binarizing line '%s' in region '%s'", line_no, region.id)
|
||||
image = self.workspace.resolve_image_as_pil(image_url, polygon_from_points(line.get_Coords().points))
|
||||
print(dir(kraken.binarization))
|
||||
bin_image = kraken.binarization.nlbin(image)
|
||||
bin_image_bytes = io.BytesIO()
|
||||
bin_image.save(bin_image_bytes, format='PNG')
|
||||
ID = concat_padded(self.output_file_grp, n)
|
||||
self.add_output_file(
|
||||
ID=ID,
|
||||
file_grp=self.output_file_grp,
|
||||
basename="%s.bin.png" % ID,
|
||||
mimetype='image/png',
|
||||
content=bin_image_bytes.getvalue()
|
||||
)
|
||||
log.debug("Recognizing line '%s' in region '%s'", line_no, region.id)
|
||||
image = self.workspace.resolve_image_as_pil(image_url,
|
||||
polygon_from_points(line.get_Coords().points))
|
||||
image_np = np.array(image, dtype=np.uint8) # XXX better way?
|
||||
|
||||
raw_results = list(self.predictor.predict_raw([image_np], progress_bar=False))[0]
|
||||
|
||||
for i, p in enumerate(raw_results):
|
||||
p.prediction.id = "fold_{}".format(i)
|
||||
|
||||
prediction = self.voter.vote_prediction_result(raw_results)
|
||||
prediction.id = "voted"
|
||||
|
||||
print('***', prediction.sentence)
|
||||
print(prediction.avg_char_probability)
|
||||
for raw_result in raw_results:
|
||||
print(raw_result.sentence)
|
||||
|
|
|
@ -10,13 +10,19 @@
|
|||
"steps": [
|
||||
"recognition/text-recognition"
|
||||
],
|
||||
"description": "Recognize lines with kraken",
|
||||
"description": "Recognize lines with Calamari",
|
||||
"input_file_grp": [
|
||||
"OCR-D-SEG-LINE"
|
||||
],
|
||||
"output_file_grp": [
|
||||
"OCR-D-OCR-CALAMARI"
|
||||
],
|
||||
"parameters": {
|
||||
"checkpoint": {"type": "string", "format": "file", "cacheable": true},
|
||||
"XXX checkpoint": {"type": "string", "format": "file", "cacheable": true},
|
||||
"processes": {"type": "number", "default": 1},
|
||||
"batch_size": {"type": "number", "default": 1},
|
||||
"voter": {"type": "string", "default": "confidence_voter_default_ctc"},
|
||||
"extended_prediction_data_format": {"type": "string", "default": "json"},
|
||||
"XXX voter": {"type": "string", "default": "confidence_voter_default_ctc"},
|
||||
"XXXX extended_prediction_data_format": {"type": "string", "default": "json"},
|
||||
"XXX output_dir": "TODO",
|
||||
"XXX extended_prediction_data": "TODO"
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue