From 4179a2ea3af1ecaccf643a411277fcee2da86429 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Wed, 26 Mar 2025 01:27:02 +0100 Subject: [PATCH] integrate binarization in ocrd-tool.json --- src/eynollah/ocrd-tool-binarization.json | 47 ---------------------- src/eynollah/ocrd-tool.json | 51 +++++++++++++++++++++--- src/eynollah/ocrd_cli_binarization.py | 2 +- 3 files changed, 47 insertions(+), 53 deletions(-) delete mode 100644 src/eynollah/ocrd-tool-binarization.json diff --git a/src/eynollah/ocrd-tool-binarization.json b/src/eynollah/ocrd-tool-binarization.json deleted file mode 100644 index c917096..0000000 --- a/src/eynollah/ocrd-tool-binarization.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "version": "0.1.0", - "git_url": "https://github.com/qurator-spk/sbb_binarization", - "tools": { - "ocrd-eynollah-binarize": { - "executable": "ocrd-eynollah-binarize", - "description": "Pixelwise binarization with selectional auto-encoders in Keras", - "categories": ["Image preprocessing"], - "steps": ["preprocessing/optimization/binarization"], - "input_file_grp": [], - "output_file_grp": [], - "parameters": { - "operation_level": { - "type": "string", - "enum": ["page", "region"], - "default": "page", - "description": "PAGE XML hierarchy level to operate on" - }, - "model": { - "description": "Directory containing HDF5 or SavedModel/ProtoBuf models. Can be an absolute path or a path relative to the OCR-D resource location, the current working directory or the $SBB_BINARIZE_DATA environment variable (if set)", - "type": "string", - "format": "uri", - "content-type": "text/directory", - "required": true - } - }, - "resources": [ - { - "url": "https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2020_01_16.zip", - "name": "default", - "type": "archive", - "path_in_archive": "saved_model_2020_01_16", - "size": 563147331, - "description": "default models provided by github.com/qurator-spk (SavedModel format)" - }, - { - "url": "https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip", - "name": "default-2021-03-09", - "type": "archive", - "path_in_archive": ".", - "size": 133230419, - "description": "updated default models provided by github.com/qurator-spk (SavedModel format)" - } - ] - } - } -} diff --git a/src/eynollah/ocrd-tool.json b/src/eynollah/ocrd-tool.json index 9eb8932..b8f5a3d 100644 --- a/src/eynollah/ocrd-tool.json +++ b/src/eynollah/ocrd-tool.json @@ -39,11 +39,11 @@ "default": true, "description": "Light version need textline light" }, - "tables": { - "type": "boolean", - "default": false, - "description": "Try to detect table regions" - }, + "tables": { + "type": "boolean", + "default": false, + "description": "Try to detect table regions" + }, "curved_line": { "type": "boolean", "default": false, @@ -70,6 +70,47 @@ "path_in_archive": "models_eynollah" } ] + }, + "ocrd-eynollah-binarize": { + "executable": "ocrd-eynollah-binarize", + "description": "Pixelwise binarization with selectional auto-encoders in Keras", + "categories": ["Image preprocessing"], + "steps": ["preprocessing/optimization/binarization"], + "input_file_grp": [], + "output_file_grp": [], + "parameters": { + "operation_level": { + "type": "string", + "enum": ["page", "region"], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + }, + "model": { + "description": "Directory containing HDF5 or SavedModel/ProtoBuf models. Can be an absolute path or a path relative to the OCR-D resource location, the current working directory or the $SBB_BINARIZE_DATA environment variable (if set)", + "type": "string", + "format": "uri", + "content-type": "text/directory", + "required": true + } + }, + "resources": [ + { + "url": "https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2020_01_16.zip", + "name": "default", + "type": "archive", + "path_in_archive": "saved_model_2020_01_16", + "size": 563147331, + "description": "default models provided by github.com/qurator-spk (SavedModel format)" + }, + { + "url": "https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip", + "name": "default-2021-03-09", + "type": "archive", + "path_in_archive": ".", + "size": 133230419, + "description": "updated default models provided by github.com/qurator-spk (SavedModel format)" + } + ] } } } diff --git a/src/eynollah/ocrd_cli_binarization.py b/src/eynollah/ocrd_cli_binarization.py index 97a44fd..5cede04 100644 --- a/src/eynollah/ocrd_cli_binarization.py +++ b/src/eynollah/ocrd_cli_binarization.py @@ -22,7 +22,7 @@ from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor from .binarize import Binarizer -OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool-binarization.json').decode('utf8')) +OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8')) TOOL = 'ocrd-eynollah-binarize'