mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-08 19:59:56 +02:00
rename ocrd-sbb-binarize
to ocrd-eynollah-binarize
This commit is contained in:
parent
3f1e140da1
commit
881f6f37c1
5 changed files with 13 additions and 13 deletions
|
@ -11,7 +11,7 @@ description = "Document Layout Analysis"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license.file = "LICENSE"
|
license.file = "LICENSE"
|
||||||
requires-python = ">=3.8"
|
requires-python = ">=3.8"
|
||||||
keywords = ["document layout analysis", "image segmentation"]
|
keywords = ["document layout analysis", "image segmentation", "image binarization"]
|
||||||
|
|
||||||
dynamic = ["dependencies", "version"]
|
dynamic = ["dependencies", "version"]
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@ plotting = ["matplotlib"]
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
eynollah = "eynollah.cli:main"
|
eynollah = "eynollah.cli:main"
|
||||||
ocrd-eynollah-segment = "eynollah.ocrd_cli:main"
|
ocrd-eynollah-segment = "eynollah.ocrd_cli:main"
|
||||||
ocrd-sbb-binarize = "eynollah.ocrd_cli_binarization:cli"
|
ocrd-eynollah-binarize = "eynollah.ocrd_cli_binarization:cli"
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
Homepage = "https://github.com/qurator-spk/eynollah"
|
Homepage = "https://github.com/qurator-spk/eynollah"
|
||||||
|
|
|
@ -24,11 +24,11 @@ def resize_image(img_in, input_height, input_width):
|
||||||
return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
|
return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
|
||||||
|
|
||||||
|
|
||||||
class SbbBinarizer:
|
class Binarizer:
|
||||||
|
|
||||||
def __init__(self, model_dir, logger=None):
|
def __init__(self, model_dir, logger=None):
|
||||||
self.model_dir = model_dir
|
self.model_dir = model_dir
|
||||||
self.log = logger if logger else logging.getLogger('SbbBinarizer')
|
self.log = logger if logger else logging.getLogger('Binarizer')
|
||||||
|
|
||||||
self.start_new_session()
|
self.start_new_session()
|
||||||
|
|
|
@ -2,7 +2,7 @@ import sys
|
||||||
import click
|
import click
|
||||||
from ocrd_utils import initLogging, setOverrideLogLevel
|
from ocrd_utils import initLogging, setOverrideLogLevel
|
||||||
from eynollah.eynollah import Eynollah, EynollahOcr
|
from eynollah.eynollah import Eynollah, EynollahOcr
|
||||||
from eynollah.sbb_binarize import SbbBinarizer
|
from eynollah.binarize import Binarizer
|
||||||
|
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
|
@ -75,7 +75,7 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
|
||||||
elif dir_out and not dir_in:
|
elif dir_out and not dir_in:
|
||||||
print("Error: You used -do to write out binarized images but have not set -di")
|
print("Error: You used -do to write out binarized images but have not set -di")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image, dir_in=dir_in,
|
Binarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image, dir_in=dir_in,
|
||||||
dir_out=dir_out)
|
dir_out=dir_out)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,8 +2,8 @@
|
||||||
"version": "0.1.0",
|
"version": "0.1.0",
|
||||||
"git_url": "https://github.com/qurator-spk/sbb_binarization",
|
"git_url": "https://github.com/qurator-spk/sbb_binarization",
|
||||||
"tools": {
|
"tools": {
|
||||||
"ocrd-sbb-binarize": {
|
"ocrd-eynollah-binarize": {
|
||||||
"executable": "ocrd-sbb-binarize",
|
"executable": "ocrd-eynollah-binarize",
|
||||||
"description": "Pixelwise binarization with selectional auto-encoders in Keras",
|
"description": "Pixelwise binarization with selectional auto-encoders in Keras",
|
||||||
"categories": ["Image preprocessing"],
|
"categories": ["Image preprocessing"],
|
||||||
"steps": ["preprocessing/optimization/binarization"],
|
"steps": ["preprocessing/optimization/binarization"],
|
||||||
|
|
|
@ -20,10 +20,10 @@ from ocrd_modelfactory import page_from_file
|
||||||
from ocrd_models.ocrd_page import AlternativeImageType, to_xml
|
from ocrd_models.ocrd_page import AlternativeImageType, to_xml
|
||||||
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
|
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
|
||||||
|
|
||||||
from .sbb_binarize import SbbBinarizer
|
from .binarize import Binarizer
|
||||||
|
|
||||||
OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool-binarization.json').decode('utf8'))
|
OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool-binarization.json').decode('utf8'))
|
||||||
TOOL = 'ocrd-sbb-binarize'
|
TOOL = 'ocrd-eynollah-binarize'
|
||||||
|
|
||||||
|
|
||||||
def cv2pil(img):
|
def cv2pil(img):
|
||||||
|
@ -68,7 +68,7 @@ class SbbBinarizeProcessor(Processor):
|
||||||
raise FileNotFoundError("Does not exist or is not a directory: %s" % model_path)
|
raise FileNotFoundError("Does not exist or is not a directory: %s" % model_path)
|
||||||
# resolve relative path via OCR-D ResourceManager
|
# resolve relative path via OCR-D ResourceManager
|
||||||
model_path = self.resolve_resource(str(model_path))
|
model_path = self.resolve_resource(str(model_path))
|
||||||
self.binarizer = SbbBinarizer(model_dir=model_path, logger=LOG)
|
self.binarizer = Binarizer(model_dir=model_path, logger=LOG)
|
||||||
|
|
||||||
def process(self):
|
def process(self):
|
||||||
"""
|
"""
|
||||||
|
@ -125,7 +125,7 @@ class SbbBinarizeProcessor(Processor):
|
||||||
for region in regions:
|
for region in regions:
|
||||||
region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh,
|
region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh,
|
||||||
feature_filter='binarized')
|
feature_filter='binarized')
|
||||||
region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), use_patches=True))
|
region_image_bin = cv2pil(Binarizer.run(image=pil2cv(region_image), use_patches=True))
|
||||||
region_image_bin_path = self.workspace.save_image_file(
|
region_image_bin_path = self.workspace.save_image_file(
|
||||||
region_image_bin,
|
region_image_bin,
|
||||||
"%s_%s.IMG-BIN" % (file_id, region.id),
|
"%s_%s.IMG-BIN" % (file_id, region.id),
|
||||||
|
@ -142,7 +142,7 @@ class SbbBinarizeProcessor(Processor):
|
||||||
for region_id, line in region_line_tuples:
|
for region_id, line in region_line_tuples:
|
||||||
line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh,
|
line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh,
|
||||||
feature_filter='binarized')
|
feature_filter='binarized')
|
||||||
line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), use_patches=True))
|
line_image_bin = cv2pil(Binarizer.run(image=pil2cv(line_image), use_patches=True))
|
||||||
line_image_bin_path = self.workspace.save_image_file(
|
line_image_bin_path = self.workspace.save_image_file(
|
||||||
line_image_bin,
|
line_image_bin,
|
||||||
"%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id),
|
"%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue