diff --git a/pyproject.toml b/pyproject.toml index 61d488a..f272aa6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ description = "Document Layout Analysis" readme = "README.md" license.file = "LICENSE" requires-python = ">=3.8" -keywords = ["document layout analysis", "image segmentation"] +keywords = ["document layout analysis", "image segmentation", "image binarization"] dynamic = ["dependencies", "version"] @@ -32,7 +32,7 @@ plotting = ["matplotlib"] [project.scripts] eynollah = "eynollah.cli:main" ocrd-eynollah-segment = "eynollah.ocrd_cli:main" -ocrd-sbb-binarize = "eynollah.ocrd_cli_binarization:cli" +ocrd-eynollah-binarize = "eynollah.ocrd_cli_binarization:cli" [project.urls] Homepage = "https://github.com/qurator-spk/eynollah" diff --git a/src/eynollah/sbb_binarize.py b/src/eynollah/binarize.py similarity index 99% rename from src/eynollah/sbb_binarize.py rename to src/eynollah/binarize.py index d503559..e422411 100644 --- a/src/eynollah/sbb_binarize.py +++ b/src/eynollah/binarize.py @@ -24,11 +24,11 @@ def resize_image(img_in, input_height, input_width): return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) -class SbbBinarizer: +class Binarizer: def __init__(self, model_dir, logger=None): self.model_dir = model_dir - self.log = logger if logger else logging.getLogger('SbbBinarizer') + self.log = logger if logger else logging.getLogger('Binarizer') self.start_new_session() diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index 6dd9c22..7cae4fc 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -2,7 +2,7 @@ import sys import click from ocrd_utils import initLogging, setOverrideLogLevel from eynollah.eynollah import Eynollah, EynollahOcr -from eynollah.sbb_binarize import SbbBinarizer +from eynollah.binarize import Binarizer @click.group() @@ -75,7 +75,7 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out) elif dir_out and not dir_in: print("Error: You used -do to write out binarized images but have not set -di") sys.exit(1) - SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image, dir_in=dir_in, + Binarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image, dir_in=dir_in, dir_out=dir_out) diff --git a/src/eynollah/ocrd-tool-binarization.json b/src/eynollah/ocrd-tool-binarization.json index 1711e89..c917096 100644 --- a/src/eynollah/ocrd-tool-binarization.json +++ b/src/eynollah/ocrd-tool-binarization.json @@ -2,8 +2,8 @@ "version": "0.1.0", "git_url": "https://github.com/qurator-spk/sbb_binarization", "tools": { - "ocrd-sbb-binarize": { - "executable": "ocrd-sbb-binarize", + "ocrd-eynollah-binarize": { + "executable": "ocrd-eynollah-binarize", "description": "Pixelwise binarization with selectional auto-encoders in Keras", "categories": ["Image preprocessing"], "steps": ["preprocessing/optimization/binarization"], diff --git a/src/eynollah/ocrd_cli_binarization.py b/src/eynollah/ocrd_cli_binarization.py index 2cec6d2..97a44fd 100644 --- a/src/eynollah/ocrd_cli_binarization.py +++ b/src/eynollah/ocrd_cli_binarization.py @@ -20,10 +20,10 @@ from ocrd_modelfactory import page_from_file from ocrd_models.ocrd_page import AlternativeImageType, to_xml from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor -from .sbb_binarize import SbbBinarizer +from .binarize import Binarizer OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool-binarization.json').decode('utf8')) -TOOL = 'ocrd-sbb-binarize' +TOOL = 'ocrd-eynollah-binarize' def cv2pil(img): @@ -68,7 +68,7 @@ class SbbBinarizeProcessor(Processor): raise FileNotFoundError("Does not exist or is not a directory: %s" % model_path) # resolve relative path via OCR-D ResourceManager model_path = self.resolve_resource(str(model_path)) - self.binarizer = SbbBinarizer(model_dir=model_path, logger=LOG) + self.binarizer = Binarizer(model_dir=model_path, logger=LOG) def process(self): """ @@ -125,7 +125,7 @@ class SbbBinarizeProcessor(Processor): for region in regions: region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized') - region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), use_patches=True)) + region_image_bin = cv2pil(Binarizer.run(image=pil2cv(region_image), use_patches=True)) region_image_bin_path = self.workspace.save_image_file( region_image_bin, "%s_%s.IMG-BIN" % (file_id, region.id), @@ -142,7 +142,7 @@ class SbbBinarizeProcessor(Processor): for region_id, line in region_line_tuples: line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized') - line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), use_patches=True)) + line_image_bin = cv2pil(Binarizer.run(image=pil2cv(line_image), use_patches=True)) line_image_bin_path = self.workspace.save_image_file( line_image_bin, "%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id),