diff --git a/.gitignore b/.gitignore index 894a44c..a67a4c3 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,5 @@ venv.bak/ # mypy .mypy_cache/ +/calamari +/calamari_models diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2f88071 --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +GIT_CLONE = git clone --depth 1 +calamari: + $(GIT_CLONE) https://github.com/chwick/calamari + +calamari_models: + $(GIT_CLONE) https://github.com/chwick/calamari_models + +calamari/build: calamari calamari_models + cd calamari &&\ + pip install -r requirements.txt ;\ + python setup.py install diff --git a/ocrd-tool.json b/ocrd-tool.json new file mode 120000 index 0000000..4a7b986 --- /dev/null +++ b/ocrd-tool.json @@ -0,0 +1 @@ +ocrd_calamari/ocrd-tool.json \ No newline at end of file diff --git a/ocrd_calamari/ocr.py b/ocrd_calamari/ocr.py new file mode 100644 index 0000000..2dd6038 --- /dev/null +++ b/ocrd_calamari/ocr.py @@ -0,0 +1,39 @@ +from __future__ import absolute_import +from calamari_ocr.scripts.predict import run + +log = getLogger('processor.KrakenOcr') + +class KrakenOcr(Processor): + + def __init__(self, *args, **kwargs): + kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd-calamari-ocr'] + super(KrakenOcr, self).__init__(*args, **kwargs) + + def process(self): + """ + Performs the binarization. + """ + for (n, input_file) in enumerate(self.input_files): + log.info("INPUT FILE %i / %s", n, input_file) + pcgts = ocrd_page.from_file(self.workspace.download_file(input_file)) + image_url = pcgts.get_Page().imageFilename + log.info("pcgts %s", pcgts) + for region in pcgts.get_Page().get_TextRegion(): + textlines = region.get_TextLine() + log.info("About to binarize %i lines of region '%s'", len(textlines), region.id) + for (line_no, line) in enumerate(textlines): + log.debug("Binarizing line '%s' in region '%s'", line_no, region.id) + image = self.workspace.resolve_image_as_pil(image_url, polygon_from_points(line.get_Coords().points)) + print(dir(kraken.binarization)) + bin_image = kraken.binarization.nlbin(image) + bin_image_bytes = io.BytesIO() + bin_image.save(bin_image_bytes, format='PNG') + ID = concat_padded(self.output_file_grp, n) + self.add_output_file( + ID=ID, + file_grp=self.output_file_grp, + basename="%s.bin.png" % ID, + mimetype='image/png', + content=bin_image_bytes.getvalue() + ) + diff --git a/ocrd_calamari/ocrd-tool.json b/ocrd_calamari/ocrd-tool.json new file mode 100644 index 0000000..6c0b0ad --- /dev/null +++ b/ocrd_calamari/ocrd-tool.json @@ -0,0 +1,25 @@ +{ + "git_url": "https://github.com/OCR-D/ocrd_calamari", + "version": "0.0.1", + "tools": { + "ocrd-calamari-ocr": { + "executable": "ocrd-calamari-ocr", + "categories": [ + "Text recognition and optimization" + ], + "steps": [ + "recognition/text-recognition" + ], + "description": "Recognize lines with kraken", + "parameters": { + "checkpoint": {"type": "string", "format": "file", "cacheable": true}, + "processes": {"type": "number", "default": 1}, + "batch_size": {"type": "number", "default": 1}, + "voter": {"type": "string", "default": "confidence_voter_default_ctc"}, + "extended_prediction_data_format": {"type": "string", "default": "json"}, + "XXX output_dir": "TODO", + "XXX extended_prediction_data": "TODO" + } + } + } +}