mirror of
https://github.com/qurator-spk/sbb_binarization.git
synced 2025-06-07 19:35:04 +02:00
Merge pull request #5 from OCR-D/ocrd-cli
Improve tooling and add OCR-D CLI
This commit is contained in:
commit
3e60a62469
13 changed files with 406 additions and 154 deletions
47
.circleci/config.yml
Normal file
47
.circleci/config.yml
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
version: 2
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
|
||||||
|
build-python36:
|
||||||
|
docker:
|
||||||
|
- image: python:3.6
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- restore_cache:
|
||||||
|
keys:
|
||||||
|
- model-cache
|
||||||
|
- run: make model
|
||||||
|
- save_cache:
|
||||||
|
key: model-cache
|
||||||
|
paths:
|
||||||
|
models.tar.gz
|
||||||
|
models
|
||||||
|
- run: make install
|
||||||
|
- run: git submodule update --init
|
||||||
|
- run: make test
|
||||||
|
|
||||||
|
build-python37:
|
||||||
|
docker:
|
||||||
|
- image: python:3.7
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- restore_cache:
|
||||||
|
keys:
|
||||||
|
- model-cache
|
||||||
|
- run: make model
|
||||||
|
- save_cache:
|
||||||
|
key: model-cache
|
||||||
|
paths:
|
||||||
|
models.tar.gz
|
||||||
|
models
|
||||||
|
- run: make install
|
||||||
|
- run: git submodule update --init
|
||||||
|
- run: make test
|
||||||
|
|
||||||
|
workflows:
|
||||||
|
version: 2
|
||||||
|
build:
|
||||||
|
jobs:
|
||||||
|
- build-python36
|
||||||
|
- build-python37
|
||||||
|
#- build-python38 # no tensorflow for python 3.8
|
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
*.egg-info
|
||||||
|
__pycache__
|
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
[submodule "repo/assets"]
|
||||||
|
path = repo/assets
|
||||||
|
url = https://github.com/OCR-D/assets
|
36
Makefile
Normal file
36
Makefile
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
# Directory to store models
|
||||||
|
MODEL_DIR = $(PWD)/models
|
||||||
|
|
||||||
|
# BEGIN-EVAL makefile-parser --make-help Makefile
|
||||||
|
|
||||||
|
help:
|
||||||
|
@echo ""
|
||||||
|
@echo " Targets"
|
||||||
|
@echo ""
|
||||||
|
@echo " install Install with pip"
|
||||||
|
@echo " model Downloads the pre-trained models from qurator-data.de"
|
||||||
|
@echo " test Run tests"
|
||||||
|
@echo ""
|
||||||
|
@echo " Variables"
|
||||||
|
@echo ""
|
||||||
|
@echo " MODEL_DIR Directory to store models"
|
||||||
|
|
||||||
|
# END-EVAL
|
||||||
|
|
||||||
|
# Install with pip
|
||||||
|
install:
|
||||||
|
pip install .
|
||||||
|
|
||||||
|
# Downloads the pre-trained models from qurator-data.de
|
||||||
|
model: $(MODEL_DIR)/model1_bin.h5
|
||||||
|
|
||||||
|
$(MODEL_DIR)/model1_bin.h5: models.tar.gz
|
||||||
|
tar xf models.tar.gz
|
||||||
|
|
||||||
|
models.tar.gz:
|
||||||
|
wget 'https://qurator-data.de/sbb_binarization/models.tar.gz'
|
||||||
|
|
||||||
|
# Run tests
|
||||||
|
test: model
|
||||||
|
cd repo/assets/data/kant_aufklaerung_1784/data; ocrd-sbb-binarize -I OCR-D-IMG -O BIN -P model $(MODEL_DIR)
|
||||||
|
cd repo/assets/data/kant_aufklaerung_1784-page-region/data; ocrd-sbb-binarize -I OCR-D-IMG -O BIN -P model $(MODEL_DIR) -P level-of-operation region
|
22
README.md
22
README.md
|
@ -1,18 +1,30 @@
|
||||||
# Binarization
|
# Binarization
|
||||||
|
|
||||||
> Binarization for document images
|
> Binarization for document images
|
||||||
|
|
||||||
## Introduction
|
## Introduction
|
||||||
This tool performs document image binarization (i.e. transform colour/grayscale to black-and-white pixels) for OCR using multiple trained models.
|
|
||||||
|
This tool performs document image binarization (i.e. transform colour/grayscale
|
||||||
|
to black-and-white pixels) for OCR using multiple trained models.
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
Clone the repository, enter it and run
|
Clone the repository, enter it and run
|
||||||
`./make`
|
|
||||||
|
`pip install .`
|
||||||
|
|
||||||
### Models
|
### Models
|
||||||
|
|
||||||
Pre-trained models can be downloaded from here:
|
Pre-trained models can be downloaded from here:
|
||||||
|
|
||||||
https://qurator-data.de/sbb_binarization/
|
https://qurator-data.de/sbb_binarization/
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
`sbb_binarize -m <directory with models> -i <image file>
|
|
||||||
-p <set to true to let the model see the image divided into patches>
|
```sh
|
||||||
-s <directory where the results will be saved>`
|
sbb_binarize \
|
||||||
|
-m <directory with models> \
|
||||||
|
-i <image file> \
|
||||||
|
-p <set to true to let the model see the image divided into patches> \
|
||||||
|
-s <directory where the results will be saved>`
|
||||||
|
```
|
||||||
|
|
1
ocrd-tool.json
Symbolic link
1
ocrd-tool.json
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
sbb_binarize/ocrd-tool.json
|
1
repo/assets
Submodule
1
repo/assets
Submodule
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit 32fde9eb242c595a1986a193090c689f52eeb734
|
6
requirements.txt
Normal file
6
requirements.txt
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
numpy >= 1.17.0, < 1.19.0
|
||||||
|
setuptools >= 41
|
||||||
|
opencv-python-headless
|
||||||
|
ocrd >= 2.18.0
|
||||||
|
keras >= 2.3.1, < 2.4
|
||||||
|
tensorflow >= 1.15, < 1.16
|
16
sbb_binarize/cli.py
Normal file
16
sbb_binarize/cli.py
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
"""
|
||||||
|
sbb_binarize CLI
|
||||||
|
"""
|
||||||
|
|
||||||
|
from click import command, option, argument, version_option
|
||||||
|
|
||||||
|
from .sbb_binarize import SbbBinarizer
|
||||||
|
|
||||||
|
@command()
|
||||||
|
@version_option()
|
||||||
|
@option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.')
|
||||||
|
@option('--model-dir', '-m', required=True, help='directory containing models for prediction')
|
||||||
|
@argument('input_image')
|
||||||
|
@argument('output_image')
|
||||||
|
def main(patches, model_dir, input_image, output_image):
|
||||||
|
SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image)
|
27
sbb_binarize/ocrd-tool.json
Normal file
27
sbb_binarize/ocrd-tool.json
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
{
|
||||||
|
"version": "0.0.1",
|
||||||
|
"git_url": "https://github.com/qurator-spk/sbb_binarization",
|
||||||
|
"tools": {
|
||||||
|
"ocrd-sbb-binarize": {
|
||||||
|
"executable": "ocrd-sbb-binarize",
|
||||||
|
"description": "Pixelwise binarization with selectional auto-encoders in Keras",
|
||||||
|
"categories": ["Image preprocessing"],
|
||||||
|
"steps": ["preprocessing/optimization/binarization"],
|
||||||
|
"input_file_grp": [],
|
||||||
|
"output_file_grp": [],
|
||||||
|
"parameters": {
|
||||||
|
"operation_level": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["page", "region"],
|
||||||
|
"default": "page",
|
||||||
|
"description": "PAGE XML hierarchy level to operate on"
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"description": "models directory.",
|
||||||
|
"type": "string",
|
||||||
|
"required": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
115
sbb_binarize/ocrd_cli.py
Normal file
115
sbb_binarize/ocrd_cli.py
Normal file
|
@ -0,0 +1,115 @@
|
||||||
|
import os.path
|
||||||
|
from pkg_resources import resource_string
|
||||||
|
from json import loads
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from click import command
|
||||||
|
|
||||||
|
from ocrd_utils import (
|
||||||
|
getLogger,
|
||||||
|
assert_file_grp_cardinality,
|
||||||
|
make_file_id,
|
||||||
|
MIMETYPE_PAGE
|
||||||
|
)
|
||||||
|
from ocrd import Processor
|
||||||
|
from ocrd_modelfactory import page_from_file
|
||||||
|
from ocrd_models.ocrd_page import AlternativeImageType, to_xml
|
||||||
|
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
|
||||||
|
|
||||||
|
from .sbb_binarize import SbbBinarizer
|
||||||
|
|
||||||
|
OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8'))
|
||||||
|
TOOL = 'ocrd-sbb-binarize'
|
||||||
|
|
||||||
|
def cv2pil(img):
|
||||||
|
return Image.fromarray(img.astype('uint8'))
|
||||||
|
|
||||||
|
def pil2cv(img):
|
||||||
|
# from ocrd/workspace.py
|
||||||
|
color_conversion = cv2.COLOR_GRAY2BGR if img.mode in ('1', 'L') else cv2.COLOR_RGB2BGR
|
||||||
|
pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img)
|
||||||
|
return cv2.cvtColor(pil_as_np_array, color_conversion)
|
||||||
|
|
||||||
|
class SbbBinarizeProcessor(Processor):
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL]
|
||||||
|
kwargs['version'] = OCRD_TOOL['version']
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def process(self):
|
||||||
|
"""
|
||||||
|
Binarize with sbb_binarization
|
||||||
|
"""
|
||||||
|
LOG = getLogger('processor.SbbBinarize')
|
||||||
|
assert_file_grp_cardinality(self.input_file_grp, 1)
|
||||||
|
assert_file_grp_cardinality(self.output_file_grp, 1)
|
||||||
|
|
||||||
|
oplevel = self.parameter['operation_level']
|
||||||
|
model_path = self.parameter['model'] # pylint: disable=attribute-defined-outside-init
|
||||||
|
binarizer = SbbBinarizer(model_dir=model_path)
|
||||||
|
|
||||||
|
for n, input_file in enumerate(self.input_files):
|
||||||
|
file_id = make_file_id(input_file, self.output_file_grp)
|
||||||
|
page_id = input_file.pageId or input_file.ID
|
||||||
|
LOG.info("INPUT FILE %i / %s", n, page_id)
|
||||||
|
pcgts = page_from_file(self.workspace.download_file(input_file))
|
||||||
|
self.add_metadata(pcgts)
|
||||||
|
pcgts.set_pcGtsId(file_id)
|
||||||
|
page = pcgts.get_Page()
|
||||||
|
|
||||||
|
if oplevel == 'page':
|
||||||
|
LOG.info("Binarizing on 'page' level in page '%s'", page_id)
|
||||||
|
page_image, page_xywh, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized')
|
||||||
|
bin_image = cv2pil(binarizer.run(image=pil2cv(page_image), use_patches=True))
|
||||||
|
# update METS (add the image file):
|
||||||
|
bin_image_path = self.workspace.save_image_file(bin_image,
|
||||||
|
file_id + '.IMG-BIN',
|
||||||
|
page_id=input_file.pageId,
|
||||||
|
file_grp=self.output_file_grp)
|
||||||
|
page.add_AlternativeImage(AlternativeImageType(filename=bin_image_path, comment='%s,binarized' % page_xywh['features']))
|
||||||
|
|
||||||
|
elif oplevel == 'region':
|
||||||
|
regions = page.get_AllRegions(['Text', 'Table'], depth=1)
|
||||||
|
if not regions:
|
||||||
|
LOG.warning("Page '%s' contains no text/table regions", page_id)
|
||||||
|
for region in regions:
|
||||||
|
region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized')
|
||||||
|
region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), use_patches=True))
|
||||||
|
region_image_bin_path = self.workspace.save_image_file(
|
||||||
|
region_image_bin,
|
||||||
|
"%s_%s.IMG-BIN" % (file_id, region.id),
|
||||||
|
page_id=input_file.pageId,
|
||||||
|
file_grp=self.output_file_grp)
|
||||||
|
region.add_AlternativeImage(
|
||||||
|
AlternativeImageType(filename=region_image_bin_path, comments='%s,binarized' % region_xywh['features']))
|
||||||
|
|
||||||
|
elif oplevel == 'line':
|
||||||
|
region_line_tuples = [(r.id, r.get_TextLine()) for r in page.get_AllRegions(['Text'], depth=0)]
|
||||||
|
if not region_line_tuples:
|
||||||
|
LOG.warning("Page '%s' contains no text lines", page_id)
|
||||||
|
for region_id, line in region_line_tuples:
|
||||||
|
line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized')
|
||||||
|
line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), use_patches=True))
|
||||||
|
line_image_bin_path = self.workspace.save_image_file(
|
||||||
|
line_image_bin,
|
||||||
|
"%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id),
|
||||||
|
page_id=input_file.pageId,
|
||||||
|
file_grp=self.output_file_grp)
|
||||||
|
line.add_AlternativeImage(
|
||||||
|
AlternativeImageType(filename=line_image_bin_path, comments='%s,binarized' % line_xywh['features']))
|
||||||
|
|
||||||
|
self.workspace.add_file(
|
||||||
|
ID=file_id,
|
||||||
|
file_grp=self.output_file_grp,
|
||||||
|
pageId=input_file.pageId,
|
||||||
|
mimetype=MIMETYPE_PAGE,
|
||||||
|
local_filename=os.path.join(self.output_file_grp, file_id + '.xml'),
|
||||||
|
content=to_xml(pcgts))
|
||||||
|
|
||||||
|
@command()
|
||||||
|
@ocrd_cli_options
|
||||||
|
def cli(*args, **kwargs):
|
||||||
|
return ocrd_cli_wrap_processor(SbbBinarizeProcessor, *args, **kwargs)
|
|
@ -1,67 +1,56 @@
|
||||||
#! /usr/bin/env python3
|
|
||||||
|
|
||||||
__version__= '1.0'
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
import numpy as np
|
|
||||||
import warnings
|
|
||||||
import cv2
|
|
||||||
from keras.models import load_model
|
|
||||||
import tensorflow as tf
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
with warnings.catch_warnings():
|
|
||||||
warnings.simplefilter("ignore")
|
|
||||||
|
|
||||||
__doc__=\
|
|
||||||
"""
|
"""
|
||||||
Tool to load model and binarize a given image.
|
Tool to load model and binarize a given image.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class sbb_binarize:
|
import sys
|
||||||
def __init__(self,image,model, patches='false',save=None ):
|
from os import listdir, environ, devnull
|
||||||
self.image=image
|
from os.path import join
|
||||||
self.patches=patches
|
from warnings import catch_warnings, simplefilter
|
||||||
self.save=save
|
|
||||||
self.model_dir=model
|
|
||||||
|
|
||||||
def resize_image(self,img_in,input_height,input_width):
|
import numpy as np
|
||||||
return cv2.resize( img_in, ( input_width,input_height) ,interpolation=cv2.INTER_NEAREST)
|
from PIL import Image
|
||||||
|
import cv2
|
||||||
|
environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||||
|
stderr = sys.stderr
|
||||||
|
sys.stderr = open(devnull, 'w')
|
||||||
|
from keras.models import load_model
|
||||||
|
sys.stderr = stderr
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
def start_new_session_and_model(self):
|
def resize_image(img_in, input_height, input_width):
|
||||||
|
return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
|
||||||
|
|
||||||
|
class SbbBinarizer:
|
||||||
|
|
||||||
|
def __init__(self, model_dir):
|
||||||
|
self.model_dir = model_dir
|
||||||
|
|
||||||
|
def start_new_session(self):
|
||||||
config = tf.ConfigProto()
|
config = tf.ConfigProto()
|
||||||
config.gpu_options.allow_growth=True
|
config.gpu_options.allow_growth = True
|
||||||
|
|
||||||
self.session =tf.Session(config=config)# tf.InteractiveSession()
|
self.session = tf.Session(config=config) # tf.InteractiveSession()
|
||||||
def load_model(self,model_name):
|
|
||||||
self.model = load_model(self.model_dir+'/'+model_name , compile=False)
|
|
||||||
|
|
||||||
|
|
||||||
self.img_height=self.model.layers[len(self.model.layers)-1].output_shape[1]
|
|
||||||
self.img_width=self.model.layers[len(self.model.layers)-1].output_shape[2]
|
|
||||||
self.n_classes=self.model.layers[len(self.model.layers)-1].output_shape[3]
|
|
||||||
|
|
||||||
def end_session(self):
|
def end_session(self):
|
||||||
self.session.close()
|
self.session.close()
|
||||||
|
|
||||||
|
|
||||||
del self.model
|
|
||||||
del self.session
|
del self.session
|
||||||
def predict(self,model_name):
|
|
||||||
self.load_model(model_name)
|
|
||||||
img=cv2.imread(self.image)
|
|
||||||
img_width_model=self.img_width
|
|
||||||
img_height_model=self.img_height
|
|
||||||
|
|
||||||
if self.patches=='true' or self.patches=='True':
|
def load_model(self, model_name):
|
||||||
|
model = load_model(join(self.model_dir, model_name), compile=False)
|
||||||
|
model_height = model.layers[len(model.layers)-1].output_shape[1]
|
||||||
|
model_width = model.layers[len(model.layers)-1].output_shape[2]
|
||||||
|
n_classes = model.layers[len(model.layers)-1].output_shape[3]
|
||||||
|
return model, model_height, model_width, n_classes
|
||||||
|
|
||||||
margin = int(0.1 * img_width_model)
|
def predict(self, model_name, img, use_patches):
|
||||||
|
model, model_height, model_width, n_classes = self.load_model(model_name)
|
||||||
|
|
||||||
width_mid = img_width_model - 2 * margin
|
if use_patches:
|
||||||
height_mid = img_height_model - 2 * margin
|
|
||||||
|
margin = int(0.1 * model_width)
|
||||||
|
|
||||||
|
width_mid = model_width - 2 * margin
|
||||||
|
height_mid = model_height - 2 * margin
|
||||||
|
|
||||||
|
|
||||||
img = img / float(255.0)
|
img = img / float(255.0)
|
||||||
|
@ -89,167 +78,140 @@ class sbb_binarize:
|
||||||
|
|
||||||
if i == 0:
|
if i == 0:
|
||||||
index_x_d = i * width_mid
|
index_x_d = i * width_mid
|
||||||
index_x_u = index_x_d + img_width_model
|
index_x_u = index_x_d + model_width
|
||||||
elif i > 0:
|
elif i > 0:
|
||||||
index_x_d = i * width_mid
|
index_x_d = i * width_mid
|
||||||
index_x_u = index_x_d + img_width_model
|
index_x_u = index_x_d + model_width
|
||||||
|
|
||||||
if j == 0:
|
if j == 0:
|
||||||
index_y_d = j * height_mid
|
index_y_d = j * height_mid
|
||||||
index_y_u = index_y_d + img_height_model
|
index_y_u = index_y_d + model_height
|
||||||
elif j > 0:
|
elif j > 0:
|
||||||
index_y_d = j * height_mid
|
index_y_d = j * height_mid
|
||||||
index_y_u = index_y_d + img_height_model
|
index_y_u = index_y_d + model_height
|
||||||
|
|
||||||
if index_x_u > img_w:
|
if index_x_u > img_w:
|
||||||
index_x_u = img_w
|
index_x_u = img_w
|
||||||
index_x_d = img_w - img_width_model
|
index_x_d = img_w - model_width
|
||||||
if index_y_u > img_h:
|
if index_y_u > img_h:
|
||||||
index_y_u = img_h
|
index_y_u = img_h
|
||||||
index_y_d = img_h - img_height_model
|
index_y_d = img_h - model_height
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
|
img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
|
||||||
|
|
||||||
label_p_pred = self.model.predict(
|
label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))
|
||||||
img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))
|
|
||||||
|
|
||||||
seg = np.argmax(label_p_pred, axis=3)[0]
|
seg = np.argmax(label_p_pred, axis=3)[0]
|
||||||
|
|
||||||
seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
|
seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
|
||||||
|
|
||||||
if i==0 and j==0:
|
if i == 0 and j == 0:
|
||||||
seg_color = seg_color[0:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :]
|
seg_color = seg_color[0:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :]
|
||||||
seg = seg[0:seg.shape[0] - margin, 0:seg.shape[1] - margin]
|
seg = seg[0:seg.shape[0] - margin, 0:seg.shape[1] - margin]
|
||||||
|
|
||||||
mask_true[index_y_d + 0:index_y_u - margin, index_x_d + 0:index_x_u - margin] = seg
|
mask_true[index_y_d + 0:index_y_u - margin, index_x_d + 0:index_x_u - margin] = seg
|
||||||
prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + 0:index_x_u - margin,
|
prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + 0:index_x_u - margin, :] = seg_color
|
||||||
:] = seg_color
|
|
||||||
|
|
||||||
elif i==nxf-1 and j==nyf-1:
|
elif i == nxf-1 and j == nyf-1:
|
||||||
seg_color = seg_color[margin:seg_color.shape[0] - 0, margin:seg_color.shape[1] - 0, :]
|
seg_color = seg_color[margin:seg_color.shape[0] - 0, margin:seg_color.shape[1] - 0, :]
|
||||||
seg = seg[margin:seg.shape[0] - 0, margin:seg.shape[1] - 0]
|
seg = seg[margin:seg.shape[0] - 0, margin:seg.shape[1] - 0]
|
||||||
|
|
||||||
mask_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - 0] = seg
|
mask_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - 0] = seg
|
||||||
prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - 0,
|
prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - 0, :] = seg_color
|
||||||
:] = seg_color
|
|
||||||
|
|
||||||
elif i==0 and j==nyf-1:
|
elif i == 0 and j == nyf-1:
|
||||||
seg_color = seg_color[margin:seg_color.shape[0] - 0, 0:seg_color.shape[1] - margin, :]
|
seg_color = seg_color[margin:seg_color.shape[0] - 0, 0:seg_color.shape[1] - margin, :]
|
||||||
seg = seg[margin:seg.shape[0] - 0, 0:seg.shape[1] - margin]
|
seg = seg[margin:seg.shape[0] - 0, 0:seg.shape[1] - margin]
|
||||||
|
|
||||||
mask_true[index_y_d + margin:index_y_u - 0, index_x_d + 0:index_x_u - margin] = seg
|
mask_true[index_y_d + margin:index_y_u - 0, index_x_d + 0:index_x_u - margin] = seg
|
||||||
prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + 0:index_x_u - margin,
|
prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + 0:index_x_u - margin, :] = seg_color
|
||||||
:] = seg_color
|
|
||||||
|
|
||||||
elif i==nxf-1 and j==0:
|
elif i == nxf-1 and j == 0:
|
||||||
seg_color = seg_color[0:seg_color.shape[0] - margin, margin:seg_color.shape[1] - 0, :]
|
seg_color = seg_color[0:seg_color.shape[0] - margin, margin:seg_color.shape[1] - 0, :]
|
||||||
seg = seg[0:seg.shape[0] - margin, margin:seg.shape[1] - 0]
|
seg = seg[0:seg.shape[0] - margin, margin:seg.shape[1] - 0]
|
||||||
|
|
||||||
mask_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - 0] = seg
|
mask_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - 0] = seg
|
||||||
prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - 0,
|
prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - 0, :] = seg_color
|
||||||
:] = seg_color
|
|
||||||
|
|
||||||
elif i==0 and j!=0 and j!=nyf-1:
|
elif i == 0 and j != 0 and j != nyf-1:
|
||||||
seg_color = seg_color[margin:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :]
|
seg_color = seg_color[margin:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :]
|
||||||
seg = seg[margin:seg.shape[0] - margin, 0:seg.shape[1] - margin]
|
seg = seg[margin:seg.shape[0] - margin, 0:seg.shape[1] - margin]
|
||||||
|
|
||||||
mask_true[index_y_d + margin:index_y_u - margin, index_x_d + 0:index_x_u - margin] = seg
|
mask_true[index_y_d + margin:index_y_u - margin, index_x_d + 0:index_x_u - margin] = seg
|
||||||
prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + 0:index_x_u - margin,
|
prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + 0:index_x_u - margin, :] = seg_color
|
||||||
:] = seg_color
|
|
||||||
|
|
||||||
elif i==nxf-1 and j!=0 and j!=nyf-1:
|
elif i == nxf-1 and j != 0 and j != nyf-1:
|
||||||
seg_color = seg_color[margin:seg_color.shape[0] - margin, margin:seg_color.shape[1] - 0, :]
|
seg_color = seg_color[margin:seg_color.shape[0] - margin, margin:seg_color.shape[1] - 0, :]
|
||||||
seg = seg[margin:seg.shape[0] - margin, margin:seg.shape[1] - 0]
|
seg = seg[margin:seg.shape[0] - margin, margin:seg.shape[1] - 0]
|
||||||
|
|
||||||
mask_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - 0] = seg
|
mask_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - 0] = seg
|
||||||
prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - 0,
|
prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - 0, :] = seg_color
|
||||||
:] = seg_color
|
|
||||||
|
|
||||||
elif i!=0 and i!=nxf-1 and j==0:
|
elif i != 0 and i != nxf-1 and j == 0:
|
||||||
seg_color = seg_color[0:seg_color.shape[0] - margin, margin:seg_color.shape[1] - margin, :]
|
seg_color = seg_color[0:seg_color.shape[0] - margin, margin:seg_color.shape[1] - margin, :]
|
||||||
seg = seg[0:seg.shape[0] - margin, margin:seg.shape[1] - margin]
|
seg = seg[0:seg.shape[0] - margin, margin:seg.shape[1] - margin]
|
||||||
|
|
||||||
mask_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - margin] = seg
|
mask_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - margin] = seg
|
||||||
prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - margin,
|
prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - margin, :] = seg_color
|
||||||
:] = seg_color
|
|
||||||
|
|
||||||
elif i!=0 and i!=nxf-1 and j==nyf-1:
|
elif i != 0 and i != nxf-1 and j == nyf-1:
|
||||||
seg_color = seg_color[margin:seg_color.shape[0] - 0, margin:seg_color.shape[1] - margin, :]
|
seg_color = seg_color[margin:seg_color.shape[0] - 0, margin:seg_color.shape[1] - margin, :]
|
||||||
seg = seg[margin:seg.shape[0] - 0, margin:seg.shape[1] - margin]
|
seg = seg[margin:seg.shape[0] - 0, margin:seg.shape[1] - margin]
|
||||||
|
|
||||||
mask_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - margin] = seg
|
mask_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - margin] = seg
|
||||||
prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - margin,
|
prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - margin, :] = seg_color
|
||||||
:] = seg_color
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
seg_color = seg_color[margin:seg_color.shape[0] - margin, margin:seg_color.shape[1] - margin, :]
|
seg_color = seg_color[margin:seg_color.shape[0] - margin, margin:seg_color.shape[1] - margin, :]
|
||||||
seg = seg[margin:seg.shape[0] - margin, margin:seg.shape[1] - margin]
|
seg = seg[margin:seg.shape[0] - margin, margin:seg.shape[1] - margin]
|
||||||
|
|
||||||
mask_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin] = seg
|
mask_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin] = seg
|
||||||
prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin,
|
prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin, :] = seg_color
|
||||||
:] = seg_color
|
|
||||||
|
|
||||||
prediction_true = prediction_true.astype(np.uint8)
|
prediction_true = prediction_true.astype(np.uint8)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
img_h_page=img.shape[0]
|
img_h_page = img.shape[0]
|
||||||
img_w_page=img.shape[1]
|
img_w_page = img.shape[1]
|
||||||
img = img /float( 255.0)
|
img = img / float(255.0)
|
||||||
img = self.resize_image(img, img_height_model, img_width_model)
|
img = resize_image(img, model_height, model_width)
|
||||||
|
|
||||||
label_p_pred = self.model.predict(
|
label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]))
|
||||||
img.reshape(1, img.shape[0], img.shape[1], img.shape[2]))
|
|
||||||
|
|
||||||
seg = np.argmax(label_p_pred, axis=3)[0]
|
seg = np.argmax(label_p_pred, axis=3)[0]
|
||||||
seg_color =np.repeat(seg[:, :, np.newaxis], 3, axis=2)
|
seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
|
||||||
prediction_true = self.resize_image(seg_color, img_h_page, img_w_page)
|
prediction_true = resize_image(seg_color, img_h_page, img_w_page)
|
||||||
prediction_true = prediction_true.astype(np.uint8)
|
prediction_true = prediction_true.astype(np.uint8)
|
||||||
return prediction_true[:,:,0]
|
return prediction_true[:,:,0]
|
||||||
|
|
||||||
def run(self):
|
def run(self, image=None, image_path=None, save=None, use_patches=False):
|
||||||
self.start_new_session_and_model()
|
if (image is not None and image_path is not None) or \
|
||||||
models_n=os.listdir(self.model_dir)
|
(image is None and image_path is None):
|
||||||
img_last=0
|
raise ValueError("Must pass either a opencv2 image or an image_path")
|
||||||
for model_in in models_n:
|
if image_path is not None:
|
||||||
|
image = cv2.imread(image)
|
||||||
res=self.predict(model_in)
|
self.start_new_session()
|
||||||
|
list_of_model_files = listdir(self.model_dir)
|
||||||
img_fin=np.zeros((res.shape[0],res.shape[1],3) )
|
img_last = 0
|
||||||
res[:,:][res[:,:]==0]=2
|
for model_in in list_of_model_files:
|
||||||
res=res-1
|
|
||||||
res=res*255
|
|
||||||
img_fin[:,:,0]=res
|
|
||||||
img_fin[:,:,1]=res
|
|
||||||
img_fin[:,:,2]=res
|
|
||||||
|
|
||||||
img_fin=img_fin.astype(np.uint8)
|
|
||||||
img_fin=(res[:,:]==0)*255
|
|
||||||
img_last=img_last+img_fin
|
|
||||||
kernel = np.ones((5,5),np.uint8)
|
|
||||||
img_last[:,:][img_last[:,:]>0]=255
|
|
||||||
img_last=(img_last[:,:]==0)*255
|
|
||||||
if self.save is not None:
|
|
||||||
cv2.imwrite(self.save,img_last)
|
|
||||||
def main():
|
|
||||||
parser=argparse.ArgumentParser()
|
|
||||||
|
|
||||||
parser.add_argument('-i','--image', dest='inp1', default=None, help='image.')
|
|
||||||
parser.add_argument('-p','--patches', dest='inp3', default=False, help='by setting this parameter to true you let the model to see the image in patches.')
|
|
||||||
parser.add_argument('-s','--save', dest='inp4', default=False, help='save prediction with a given name here. The name and format should be given (outputname.tif).')
|
|
||||||
parser.add_argument('-m','--model', dest='inp2', default=None, help='models directory.')
|
|
||||||
|
|
||||||
options=parser.parse_args()
|
|
||||||
|
|
||||||
possibles=globals()
|
|
||||||
possibles.update(locals())
|
|
||||||
x=sbb_binarize(options.inp1,options.inp2,options.inp3,options.inp4)
|
|
||||||
x.run()
|
|
||||||
|
|
||||||
if __name__=="__main__":
|
|
||||||
main()
|
|
||||||
|
|
||||||
|
res = self.predict(model_in, image, use_patches)
|
||||||
|
|
||||||
|
img_fin = np.zeros((res.shape[0], res.shape[1], 3))
|
||||||
|
res[:, :][res[:, :] == 0] = 2
|
||||||
|
res = res - 1
|
||||||
|
res = res * 255
|
||||||
|
img_fin[:, :, 0] = res
|
||||||
|
img_fin[:, :, 1] = res
|
||||||
|
img_fin[:, :, 2] = res
|
||||||
|
|
||||||
|
img_fin = img_fin.astype(np.uint8)
|
||||||
|
img_fin = (res[:, :] == 0) * 255
|
||||||
|
img_last = img_last + img_fin
|
||||||
|
|
||||||
|
kernel = np.ones((5, 5), np.uint8)
|
||||||
|
img_last[:, :][img_last[:, :] > 0] = 255
|
||||||
|
img_last = (img_last[:, :] == 0) * 255
|
||||||
|
if save:
|
||||||
|
cv2.imwrite(save, img_last)
|
||||||
|
return img_last
|
||||||
|
|
30
setup.py
30
setup.py
|
@ -1,6 +1,30 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from json import load
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
import setuptools
|
with open('./ocrd-tool.json', 'r') as f:
|
||||||
from numpy.distutils.core import Extension, setup
|
version = load(f)['version']
|
||||||
|
|
||||||
setup(name='sbb_binarize',version=1.0,packages=['sbb_binarize'])
|
install_requires = open('requirements.txt').read().split('\n')
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name='sbb_binarization',
|
||||||
|
version=version,
|
||||||
|
description='Pixelwise binarization with selectional auto-encoders in Keras',
|
||||||
|
long_description=open('README.md').read(),
|
||||||
|
long_description_content_type='text/markdown',
|
||||||
|
author='Vahid Rezanezhad',
|
||||||
|
url='https://github.com/qurator-spk/sbb_binarization',
|
||||||
|
license='Apache License 2.0',
|
||||||
|
packages=find_packages(exclude=('tests', 'docs')),
|
||||||
|
include_package_data=True,
|
||||||
|
package_data={'': ['*.json', '*.yml', '*.yaml']},
|
||||||
|
install_requires=install_requires,
|
||||||
|
entry_points={
|
||||||
|
'console_scripts': [
|
||||||
|
'sbb_binarize=sbb_binarize.cli:main',
|
||||||
|
'ocrd-sbb-binarize=sbb_binarize.ocrd_cli:cli',
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue