From 47e8ddae5a4ea3a39c06f68072ffaadb3df558fb Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 15 Oct 2020 14:44:41 +0200 Subject: [PATCH 01/27] :art: clean up README, create proper Makefile --- Makefile | 4 ++++ README.md | 22 +++++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f92ba15 --- /dev/null +++ b/Makefile @@ -0,0 +1,4 @@ +all: install + +install: + pip install . diff --git a/README.md b/README.md index be80345..dfb4d5c 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,30 @@ # Binarization + > Binarization for document images ## Introduction -This tool performs document image binarization (i.e. transform colour/grayscale to black-and-white pixels) for OCR using multiple trained models. + +This tool performs document image binarization (i.e. transform colour/grayscale +to black-and-white pixels) for OCR using multiple trained models. ## Installation + Clone the repository, enter it and run -`./make` + +`pip install .` ### Models + Pre-trained models can be downloaded from here: + https://qurator-data.de/sbb_binarization/ ## Usage -`sbb_binarize -m -i --p --s ` + +```sh +sbb_binarize \ + -m \ + -i \ + -p \ + -s ` +``` From b8310a4240be60ba1233fc5c475780bf85edf30f Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 15 Oct 2020 15:18:57 +0200 Subject: [PATCH 02/27] setup.py/requirements.txt/gitignore --- .gitignore | 2 ++ requirements.txt | 4 ++++ setup.py | 28 +++++++++++++++++++++++++--- 3 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 .gitignore create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c14b1f1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.egg-info +__pycache__ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..39fbd11 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +opencv-python-headless +numpy +keras >= 2.3.1, < 2.4 +tensorflow >= 1.15, < 1.16 diff --git a/setup.py b/setup.py index ac55505..3796c51 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,28 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- +from json import load +from setuptools import setup, find_packages -import setuptools -from numpy.distutils.core import Extension, setup +with open('./ocrd-tool.json', 'r') as f: + version = load(f)['version'] -setup(name='sbb_binarize',version=1.0,packages=['sbb_binarize']) +install_requires = open('requirements.txt').read().split('\n') + +setup( + name='sbb_binarization', + version=version, + description='Binarization with ', + long_description=open('README.md').read(), + long_description_content_type='text/markdown', + author='Vahid Rezanezhad', + url='https://github.com/qurator-spk/sbb_binarization', + license='Apache License 2.0', + packages=find_packages(exclude=('tests', 'docs')), + include_package_data=True, + install_requires=install_requires, + entry_points={ + 'console_scripts': [ + 'sbb_binarize=sbb_binarize.sbb_binarize:main', + ] + }, +) From 150f03154ff139a528c9917fe0ccfa247c0c9b86 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 15 Oct 2020 15:19:04 +0200 Subject: [PATCH 03/27] add ocrd-tool.json --- ocrd-tool.json | 1 + sbb_binarize/ocrd-tool.json | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 120000 ocrd-tool.json create mode 100644 sbb_binarize/ocrd-tool.json diff --git a/ocrd-tool.json b/ocrd-tool.json new file mode 120000 index 0000000..3c8dc95 --- /dev/null +++ b/ocrd-tool.json @@ -0,0 +1 @@ +sbb_binarize/ocrd-tool.json \ No newline at end of file diff --git a/sbb_binarize/ocrd-tool.json b/sbb_binarize/ocrd-tool.json new file mode 100644 index 0000000..70eab2a --- /dev/null +++ b/sbb_binarize/ocrd-tool.json @@ -0,0 +1,26 @@ +{ + "version": "0.0.1", + "git_url": "https://github.com/qurator-spk/sbb_binarization", + "tools": { + "ocrd-sbb-binarize": { + "executable": "ocrd-sbb-binarize", + "description": "Smart binarization with sbb_binarization", + "categories": ["Image preprocessing"], + "steps": ["preprocessing/optimization/binarization"], + "input_file_grp": [], + "output_file_grp": [], + "parameters": { + "patches": { + "description": "by setting this parameter to true you let the model to see the image in patches.", + "type": "boolean", + "default": false + }, + "model": { + "description": "models directory.", + "format": "string", + "required": true + } + } + } + } +} From 71d44408b3a1bafc0d1f9bc6cd713d873276234d Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 15 Oct 2020 15:33:30 +0200 Subject: [PATCH 04/27] :art: clean up code --- sbb_binarize/sbb_binarize.py | 213 ++++++++++++++++------------------- 1 file changed, 98 insertions(+), 115 deletions(-) diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py index f701a1c..7faf1be 100644 --- a/sbb_binarize/sbb_binarize.py +++ b/sbb_binarize/sbb_binarize.py @@ -1,60 +1,57 @@ -#! /usr/bin/env python3 +""" +Tool to load model and binarize a given image. +""" -__version__= '1.0' +from argparse import ArgumentParser +from os import listdir +from os.path import join +from warnings import catch_warnings, simplefilter -import argparse -import sys -import os import numpy as np -import warnings import cv2 from keras.models import load_model import tensorflow as tf - - -with warnings.catch_warnings(): - warnings.simplefilter("ignore") - -__doc__=\ -""" -Tool to load model and binarize a given image. -""" +# XXX better to set env var before tensorflow import to suppress those specific warnings +with catch_warnings(): + simplefilter("ignore") class sbb_binarize: - def __init__(self,image,model, patches='false',save=None ): - self.image=image - self.patches=patches - self.save=save - self.model_dir=model + + # TODO use True/False for patches + def __init__(self, image, model, patches='false', save=None): + self.image = image + self.patches = patches + self.save = save + self.model_dir = model def resize_image(self,img_in,input_height,input_width): - return cv2.resize( img_in, ( input_width,input_height) ,interpolation=cv2.INTER_NEAREST) - + return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) + def start_new_session_and_model(self): config = tf.ConfigProto() - config.gpu_options.allow_growth=True - - self.session =tf.Session(config=config)# tf.InteractiveSession() + config.gpu_options.allow_growth = True + + self.session = tf.Session(config=config) # tf.InteractiveSession() + def load_model(self,model_name): - self.model = load_model(self.model_dir+'/'+model_name , compile=False) - - - self.img_height=self.model.layers[len(self.model.layers)-1].output_shape[1] - self.img_width=self.model.layers[len(self.model.layers)-1].output_shape[2] - self.n_classes=self.model.layers[len(self.model.layers)-1].output_shape[3] - def end_session(self): - self.session.close() + self.model = load_model(join(self.model_dir, model_name), compile=False) + self.img_height = self.model.layers[len(self.model.layers)-1].output_shape[1] + self.img_width = self.model.layers[len(self.model.layers)-1].output_shape[2] + self.n_classes = self.model.layers[len(self.model.layers)-1].output_shape[3] + def end_session(self): + self.session.close() del self.model del self.session + def predict(self,model_name): self.load_model(model_name) - img=cv2.imread(self.image) - img_width_model=self.img_width - img_height_model=self.img_height + img = cv2.imread(self.image) + img_width_model = self.img_width + img_height_model = self.img_height if self.patches=='true' or self.patches=='True': @@ -107,149 +104,135 @@ class sbb_binarize: if index_y_u > img_h: index_y_u = img_h index_y_d = img_h - img_height_model - - img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = self.model.predict( - img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) + label_p_pred = self.model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) seg = np.argmax(label_p_pred, axis=3)[0] seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - if i==0 and j==0: + if i == 0 and j == 0: seg_color = seg_color[0:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :] seg = seg[0:seg.shape[0] - margin, 0:seg.shape[1] - margin] mask_true[index_y_d + 0:index_y_u - margin, index_x_d + 0:index_x_u - margin] = seg - prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + 0:index_x_u - margin, - :] = seg_color - - elif i==nxf-1 and j==nyf-1: + prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + 0:index_x_u - margin, :] = seg_color + + elif i == nxf-1 and j == nyf-1: seg_color = seg_color[margin:seg_color.shape[0] - 0, margin:seg_color.shape[1] - 0, :] seg = seg[margin:seg.shape[0] - 0, margin:seg.shape[1] - 0] mask_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - 0] = seg - prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - 0, - :] = seg_color - - elif i==0 and j==nyf-1: + prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - 0, :] = seg_color + + elif i == 0 and j == nyf-1: seg_color = seg_color[margin:seg_color.shape[0] - 0, 0:seg_color.shape[1] - margin, :] seg = seg[margin:seg.shape[0] - 0, 0:seg.shape[1] - margin] mask_true[index_y_d + margin:index_y_u - 0, index_x_d + 0:index_x_u - margin] = seg - prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + 0:index_x_u - margin, - :] = seg_color - - elif i==nxf-1 and j==0: + prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + 0:index_x_u - margin, :] = seg_color + + elif i == nxf-1 and j == 0: seg_color = seg_color[0:seg_color.shape[0] - margin, margin:seg_color.shape[1] - 0, :] seg = seg[0:seg.shape[0] - margin, margin:seg.shape[1] - 0] mask_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - 0] = seg - prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - 0, - :] = seg_color - - elif i==0 and j!=0 and j!=nyf-1: + prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - 0, :] = seg_color + + elif i == 0 and j != 0 and j != nyf-1: seg_color = seg_color[margin:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :] seg = seg[margin:seg.shape[0] - margin, 0:seg.shape[1] - margin] mask_true[index_y_d + margin:index_y_u - margin, index_x_d + 0:index_x_u - margin] = seg - prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + 0:index_x_u - margin, - :] = seg_color - - elif i==nxf-1 and j!=0 and j!=nyf-1: + prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + 0:index_x_u - margin, :] = seg_color + + elif i == nxf-1 and j != 0 and j != nyf-1: seg_color = seg_color[margin:seg_color.shape[0] - margin, margin:seg_color.shape[1] - 0, :] seg = seg[margin:seg.shape[0] - margin, margin:seg.shape[1] - 0] mask_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - 0] = seg - prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - 0, - :] = seg_color - - elif i!=0 and i!=nxf-1 and j==0: + prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - 0, :] = seg_color + + elif i != 0 and i != nxf-1 and j == 0: seg_color = seg_color[0:seg_color.shape[0] - margin, margin:seg_color.shape[1] - margin, :] seg = seg[0:seg.shape[0] - margin, margin:seg.shape[1] - margin] mask_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - margin] = seg - prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - margin, - :] = seg_color - - elif i!=0 and i!=nxf-1 and j==nyf-1: + prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - margin, :] = seg_color + + elif i != 0 and i != nxf-1 and j == nyf-1: seg_color = seg_color[margin:seg_color.shape[0] - 0, margin:seg_color.shape[1] - margin, :] seg = seg[margin:seg.shape[0] - 0, margin:seg.shape[1] - margin] mask_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - margin] = seg - prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - margin, - :] = seg_color + prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - margin, :] = seg_color else: seg_color = seg_color[margin:seg_color.shape[0] - margin, margin:seg_color.shape[1] - margin, :] seg = seg[margin:seg.shape[0] - margin, margin:seg.shape[1] - margin] mask_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin] = seg - prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin, - :] = seg_color + prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin, :] = seg_color prediction_true = prediction_true.astype(np.uint8) - + else: - img_h_page=img.shape[0] - img_w_page=img.shape[1] - img = img /float( 255.0) + img_h_page = img.shape[0] + img_w_page = img.shape[1] + img = img / float(255.0) img = self.resize_image(img, img_height_model, img_width_model) label_p_pred = self.model.predict( img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) seg = np.argmax(label_p_pred, axis=3)[0] - seg_color =np.repeat(seg[:, :, np.newaxis], 3, axis=2) + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = self.resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) return prediction_true[:,:,0] def run(self): self.start_new_session_and_model() - models_n=os.listdir(self.model_dir) - img_last=0 + models_n = listdir(self.model_dir) + img_last = 0 for model_in in models_n: - - res=self.predict(model_in) - - img_fin=np.zeros((res.shape[0],res.shape[1],3) ) - res[:,:][res[:,:]==0]=2 - res=res-1 - res=res*255 - img_fin[:,:,0]=res - img_fin[:,:,1]=res - img_fin[:,:,2]=res - - img_fin=img_fin.astype(np.uint8) - img_fin=(res[:,:]==0)*255 - img_last=img_last+img_fin - kernel = np.ones((5,5),np.uint8) - img_last[:,:][img_last[:,:]>0]=255 - img_last=(img_last[:,:]==0)*255 - if self.save is not None: - cv2.imwrite(self.save,img_last) + + res = self.predict(model_in) + + img_fin = np.zeros((res.shape[0], res.shape[1], 3)) + res[:, :][res[:, :] == 0] = 2 + res = res-1 + res = res*255 + img_fin[:, :, 0] = res + img_fin[:, :, 1] = res + img_fin[:, :, 2] = res + + img_fin = img_fin.astype(np.uint8) + img_fin = (res[:, :] == 0)*255 + img_last = img_last+img_fin + + kernel = np.ones((5, 5), np.uint8) + img_last[:, :][img_last[:, :] > 0] = 255 + img_last = (img_last[:, :] == 0)*255 + if self.save: + cv2.imwrite(self.save, img_last) + def main(): - parser=argparse.ArgumentParser() - - parser.add_argument('-i','--image', dest='inp1', default=None, help='image.') - parser.add_argument('-p','--patches', dest='inp3', default=False, help='by setting this parameter to true you let the model to see the image in patches.') - parser.add_argument('-s','--save', dest='inp4', default=False, help='save prediction with a given name here. The name and format should be given (outputname.tif).') - parser.add_argument('-m','--model', dest='inp2', default=None, help='models directory.') - - options=parser.parse_args() - - possibles=globals() + parser = ArgumentParser() + + parser.add_argument('-i', '--image', dest='inp1', default=None, help='image.') + parser.add_argument('-p', '--patches', dest='inp3', default=False, help='by setting this parameter to true you let the model to see the image in patches.') + parser.add_argument('-s', '--save', dest='inp4', default=False, help='save prediction with a given name here. The name and format should be given (outputname.tif).') + parser.add_argument('-m', '--model', dest='inp2', default=None, help='models directory.') + + options = parser.parse_args() + + possibles = globals() possibles.update(locals()) - x=sbb_binarize(options.inp1,options.inp2,options.inp3,options.inp4) + x = sbb_binarize(options.inp1, options.inp2, options.inp3, options.inp4) x.run() -if __name__=="__main__": +if __name__ == "__main__": main() - - - - From 389ef088d09449cdea2843d1e548a1a27b082388 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 15 Oct 2020 15:38:49 +0200 Subject: [PATCH 05/27] put CLI into its own module --- sbb_binarize/cli.py | 23 +++++++++++++++++++++++ sbb_binarize/sbb_binarize.py | 21 +-------------------- setup.py | 2 +- 3 files changed, 25 insertions(+), 21 deletions(-) create mode 100644 sbb_binarize/cli.py diff --git a/sbb_binarize/cli.py b/sbb_binarize/cli.py new file mode 100644 index 0000000..3de8820 --- /dev/null +++ b/sbb_binarize/cli.py @@ -0,0 +1,23 @@ +""" +sbb_binarize CLI +""" + +from argparse import ArgumentParser + +from .sbb_binarize import SbbBinarizer + +def main(): + parser = ArgumentParser() + + parser.add_argument('-i', '--image', default=None, help='image.') + parser.add_argument('-p', '--patches', default=False, help='by setting this parameter to true you let the model to see the image in patches.') + parser.add_argument('-s', '--save', default=False, help='save prediction with a given name here. The name and format should be given (outputname.tif).') + parser.add_argument('-m', '--model', default=None, help='models directory.') + + options = parser.parse_args() + + binarizer = SbbBinarizer(options.image, options.model, options.patches, options.save) + binarizer.run() + +if __name__ == "__main__": + main() diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py index 7faf1be..6c8f8fd 100644 --- a/sbb_binarize/sbb_binarize.py +++ b/sbb_binarize/sbb_binarize.py @@ -2,7 +2,6 @@ Tool to load model and binarize a given image. """ -from argparse import ArgumentParser from os import listdir from os.path import join from warnings import catch_warnings, simplefilter @@ -16,7 +15,7 @@ import tensorflow as tf with catch_warnings(): simplefilter("ignore") -class sbb_binarize: +class SbbBinarizer: # TODO use True/False for patches def __init__(self, image, model, patches='false', save=None): @@ -218,21 +217,3 @@ class sbb_binarize: img_last = (img_last[:, :] == 0)*255 if self.save: cv2.imwrite(self.save, img_last) - -def main(): - parser = ArgumentParser() - - parser.add_argument('-i', '--image', dest='inp1', default=None, help='image.') - parser.add_argument('-p', '--patches', dest='inp3', default=False, help='by setting this parameter to true you let the model to see the image in patches.') - parser.add_argument('-s', '--save', dest='inp4', default=False, help='save prediction with a given name here. The name and format should be given (outputname.tif).') - parser.add_argument('-m', '--model', dest='inp2', default=None, help='models directory.') - - options = parser.parse_args() - - possibles = globals() - possibles.update(locals()) - x = sbb_binarize(options.inp1, options.inp2, options.inp3, options.inp4) - x.run() - -if __name__ == "__main__": - main() diff --git a/setup.py b/setup.py index 3796c51..980dd2f 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ setup( install_requires=install_requires, entry_points={ 'console_scripts': [ - 'sbb_binarize=sbb_binarize.sbb_binarize:main', + 'sbb_binarize=sbb_binarize.cli:main', ] }, ) From ca03844c2b416897b9cd8fb5545f6afc00a7455b Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 15 Oct 2020 16:29:55 +0200 Subject: [PATCH 06/27] allow passing image directly, return image on binarize --- sbb_binarize/cli.py | 7 ++++++- sbb_binarize/sbb_binarize.py | 26 ++++++++++++++++---------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/sbb_binarize/cli.py b/sbb_binarize/cli.py index 3de8820..20881b5 100644 --- a/sbb_binarize/cli.py +++ b/sbb_binarize/cli.py @@ -16,7 +16,12 @@ def main(): options = parser.parse_args() - binarizer = SbbBinarizer(options.image, options.model, options.patches, options.save) + binarizer = SbbBinarizer( + image_path=options.image, + model=options.model, + patches=options.patches, + save=options.save + ) binarizer.run() if __name__ == "__main__": diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py index 6c8f8fd..70a81cf 100644 --- a/sbb_binarize/sbb_binarize.py +++ b/sbb_binarize/sbb_binarize.py @@ -15,25 +15,30 @@ import tensorflow as tf with catch_warnings(): simplefilter("ignore") +def resize_image(img_in, input_height, input_width): + return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) + class SbbBinarizer: # TODO use True/False for patches - def __init__(self, image, model, patches='false', save=None): - self.image = image + def __init__(self, model, image=None, image_path=None, patches='false', save=None): + if not(image or image_path) or (image and image_path): + raise ValueError("Must pass either a PIL image or an image_path") + if image: + self.image = image + else: + self.image = cv2.imread(self.image) self.patches = patches self.save = save self.model_dir = model - def resize_image(self,img_in,input_height,input_width): - return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) - def start_new_session_and_model(self): config = tf.ConfigProto() config.gpu_options.allow_growth = True self.session = tf.Session(config=config) # tf.InteractiveSession() - def load_model(self,model_name): + def load_model(self, model_name): self.model = load_model(join(self.model_dir, model_name), compile=False) @@ -48,11 +53,11 @@ class SbbBinarizer: def predict(self,model_name): self.load_model(model_name) - img = cv2.imread(self.image) + img = self.image img_width_model = self.img_width img_height_model = self.img_height - if self.patches=='true' or self.patches=='True': + if self.patches in ('true', 'True'): margin = int(0.1 * img_width_model) @@ -181,14 +186,14 @@ class SbbBinarizer: img_h_page = img.shape[0] img_w_page = img.shape[1] img = img / float(255.0) - img = self.resize_image(img, img_height_model, img_width_model) + img = resize_image(img, img_height_model, img_width_model) label_p_pred = self.model.predict( img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) seg = np.argmax(label_p_pred, axis=3)[0] seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - prediction_true = self.resize_image(seg_color, img_h_page, img_w_page) + prediction_true = resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) return prediction_true[:,:,0] @@ -217,3 +222,4 @@ class SbbBinarizer: img_last = (img_last[:, :] == 0)*255 if self.save: cv2.imwrite(self.save, img_last) + return img_last From 5909f94fab77c9c80ba19b25478d8d42a3ec26f4 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 15 Oct 2020 16:39:38 +0200 Subject: [PATCH 07/27] initial OCR-D interface --- sbb_binarize/ocrd-tool.json | 8 +++- sbb_binarize/ocrd_cli.py | 75 +++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 sbb_binarize/ocrd_cli.py diff --git a/sbb_binarize/ocrd-tool.json b/sbb_binarize/ocrd-tool.json index 70eab2a..dafc09a 100644 --- a/sbb_binarize/ocrd-tool.json +++ b/sbb_binarize/ocrd-tool.json @@ -10,6 +10,12 @@ "input_file_grp": [], "output_file_grp": [], "parameters": { + "operation_level": { + "type": "string", + "enum": ["page", "region", "line"], + "default": "page", + "description": "PAGE XML hierarchy level to operate on (currently only page supported" + }, "patches": { "description": "by setting this parameter to true you let the model to see the image in patches.", "type": "boolean", @@ -17,7 +23,7 @@ }, "model": { "description": "models directory.", - "format": "string", + "type": "string", "required": true } } diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py new file mode 100644 index 0000000..7cb8022 --- /dev/null +++ b/sbb_binarize/ocrd_cli.py @@ -0,0 +1,75 @@ +import os.path +from pkg_resources import resource_string +from json import loads + +from ocrd_utils import ( + getLogger, + assert_file_grp_cardinality, + make_file_id, + MIMETYPE_PAGE +) +from ocrd_modelfactory import page_from_file +from ocrd_models.ocrd_page import ( + MetadataItemType, + LabelsType, LabelType, + AlternativeImageType, + TextRegionType, + to_xml +) +from ocrd import Processor + +from .sbb_binarize import SbbBinarizer + +OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8')) +TOOL = 'ocrd-sbb-binarize' + +class SbbBinarizeProcessor(Processor): + + def __init__(self, *args, **kwargs): + kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL] + kwargs['version'] = OCRD_TOOL['version'] + super().__init__(*args, **kwargs) + + def process(self): + """ + Binarize with sbb_binarization + """ + LOG = getLogger('processor.SbbBinarize') + assert_file_grp_cardinality(self.input_file_grp, 1) + assert_file_grp_cardinality(self.output_file_grp, 1) + + oplevel = self.parameter['operation_level'] + use_patches = self.parameter['patches'] + model_path = self.parameter['model'] + + for n, input_file in enumerate(self.input_files): + file_id = make_file_id(input_file, self.output_file_grp) + page_id = input_file.pageId or input_file.ID + LOG.info("INPUT FILE %i / %s", n, page_id) + pcgts = page_from_file(self.workspace.download_file(input_file)) + self.add_metadata(pcgts) + page = pcgts.get_Page() + + if oplevel == 'page': + LOG.info("Binarizing on 'page' level in page '%s'", page_id) + page_image, page_xywh, _ = self.workspace.image_from_page(page, page_id) + binarizer = SbbBinarizer(image=page_image, model=model_path, patches=use_patches, save=None) + bin_image = binarizer.run() + # update METS (add the image file): + bin_image_path = self.workspace.save_image_file(bin_image, + file_id + '.IMG-BIN', + page_id=page_id, + file_grp=self.output_file_grp) + page.add_AlternativeImage(filename=bin_image_path, comment="binarized") + else: + raise NotImplementedError("Binarization below page level not implemented yet") + + file_id = make_file_id(input_file, self.output_file_grp) + pcgts.set_pcGtsId(file_id) + self.workspace.add_file( + ID=file_id, + file_grp=self.output_file_grp, + pageId=input_file.pageId, + mimetype=MIMETYPE_PAGE, + local_filename=os.path.join(self.output_file_grp, file_id + '.xml'), + content=to_xml(pcgts)) From ee26ebd7d8a4bf359f303c49fe68a69b91a26721 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 15 Oct 2020 16:55:17 +0200 Subject: [PATCH 08/27] implement region/line binarization --- sbb_binarize/ocrd-tool.json | 2 +- sbb_binarize/ocrd_cli.py | 66 +++++++++++++++++++++++++++++-------- 2 files changed, 54 insertions(+), 14 deletions(-) diff --git a/sbb_binarize/ocrd-tool.json b/sbb_binarize/ocrd-tool.json index dafc09a..e0c4795 100644 --- a/sbb_binarize/ocrd-tool.json +++ b/sbb_binarize/ocrd-tool.json @@ -14,7 +14,7 @@ "type": "string", "enum": ["page", "region", "line"], "default": "page", - "description": "PAGE XML hierarchy level to operate on (currently only page supported" + "description": "PAGE XML hierarchy level to operate on" }, "patches": { "description": "by setting this parameter to true you let the model to see the image in patches.", diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index 7cb8022..187269f 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -1,3 +1,5 @@ +# TODO: AlternativeImage 'binarized' comment should be additive + import os.path from pkg_resources import resource_string from json import loads @@ -9,13 +11,7 @@ from ocrd_utils import ( MIMETYPE_PAGE ) from ocrd_modelfactory import page_from_file -from ocrd_models.ocrd_page import ( - MetadataItemType, - LabelsType, LabelType, - AlternativeImageType, - TextRegionType, - to_xml -) +from ocrd_models.ocrd_page import AlternativeImageType, to_xml from ocrd import Processor from .sbb_binarize import SbbBinarizer @@ -48,24 +44,68 @@ class SbbBinarizeProcessor(Processor): LOG.info("INPUT FILE %i / %s", n, page_id) pcgts = page_from_file(self.workspace.download_file(input_file)) self.add_metadata(pcgts) + pcgts.set_pcGtsId(file_id) page = pcgts.get_Page() if oplevel == 'page': LOG.info("Binarizing on 'page' level in page '%s'", page_id) page_image, page_xywh, _ = self.workspace.image_from_page(page, page_id) - binarizer = SbbBinarizer(image=page_image, model=model_path, patches=use_patches, save=None) - bin_image = binarizer.run() + bin_image = SbbBinarizer( + image=page_image, + model=model_path, + patches=use_patches, + save=None + ).run() # update METS (add the image file): bin_image_path = self.workspace.save_image_file(bin_image, file_id + '.IMG-BIN', page_id=page_id, file_grp=self.output_file_grp) - page.add_AlternativeImage(filename=bin_image_path, comment="binarized") + page.add_AlternativeImage(AlternativeImageType(filename=bin_image_path, comment="binarized")) + else: - raise NotImplementedError("Binarization below page level not implemented yet") + regions = page.get_AllRegions(['Text', 'Table']) + if not regions: + LOG.warning("Page '%s' contains no text/table regions", page_id) + + for region in regions: + region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh) + + if oplevel == 'region': + region_image_bin = SbbBinarizer( + image=region_image, + model=model_path, + patches=use_patches, + save=None + ).run() + region_image_bin_path = self.workspace.save_image_file( + region_image_bin, + "%s_%s.IMG-BIN" % (file_id, region.id), + page_id=page_id, + file_grp=self.output_file_grp) + region.add_AlternativeImage( + AlternativeImageType(filename=region_image_bin_path, comments='binarized')) + + elif oplevel == 'line': + lines = region.get_TextLine() + if not lines: + LOG.warning("Page '%s' region '%s' contains no text lines", page_id, region.id) + for line in lines: + line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh) + line_image_bin = SbbBinarizer( + image=line_image, + model=model_path, + patches=use_patches, + save=None + ).run() + line_image_bin_path = self.workspace.save_image_file( + line_image_bin, + "%s_%s_%s.IMG-BIN" % (file_id, region.id, line.id), + page_id=page_id, + file_grp=self.output_file_grp) + line.add_AlternativeImage( + AlternativeImageType(filename=line_image_bin_path, comments='binarized')) - file_id = make_file_id(input_file, self.output_file_grp) - pcgts.set_pcGtsId(file_id) self.workspace.add_file( ID=file_id, file_grp=self.output_file_grp, From 6913415f55c7d5f0126ccda1ebc77ffe1c337a5d Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 15 Oct 2020 17:06:08 +0200 Subject: [PATCH 09/27] require ocrd --- requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 39fbd11..d6a9388 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ +numpy >= 1.17.0, < 1.19.0 +setuptools >= 41 opencv-python-headless -numpy +ocrd >= 2.18.0 keras >= 2.3.1, < 2.4 tensorflow >= 1.15, < 1.16 From d4c97ebf7260eadf0e735255c5d86cd33ac61114 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 15 Oct 2020 17:12:17 +0200 Subject: [PATCH 10/27] add OCR-D click interface --- sbb_binarize/ocrd_cli.py | 9 ++++++++- setup.py | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index 187269f..f4eac2c 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -4,15 +4,17 @@ import os.path from pkg_resources import resource_string from json import loads +from click import command from ocrd_utils import ( getLogger, assert_file_grp_cardinality, make_file_id, MIMETYPE_PAGE ) +from ocrd import Processor from ocrd_modelfactory import page_from_file from ocrd_models.ocrd_page import AlternativeImageType, to_xml -from ocrd import Processor +from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor from .sbb_binarize import SbbBinarizer @@ -113,3 +115,8 @@ class SbbBinarizeProcessor(Processor): mimetype=MIMETYPE_PAGE, local_filename=os.path.join(self.output_file_grp, file_id + '.xml'), content=to_xml(pcgts)) + +@command() +@ocrd_cli_options +def cli(*args, **kwargs): + return ocrd_cli_wrap_processor(SbbBinarizeProcessor, *args, **kwargs) diff --git a/setup.py b/setup.py index 980dd2f..156f50d 100644 --- a/setup.py +++ b/setup.py @@ -23,6 +23,7 @@ setup( entry_points={ 'console_scripts': [ 'sbb_binarize=sbb_binarize.cli:main', + 'ocrd-sbb-binarize=sbb_binarize.ocrd_cli:cli', ] }, ) From a4c0cf0a475dbe8934fab3ae699512c435029e2d Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 15 Oct 2020 17:26:13 +0200 Subject: [PATCH 11/27] shut up keras/tensorflow --- sbb_binarize/sbb_binarize.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py index 70a81cf..bb58f36 100644 --- a/sbb_binarize/sbb_binarize.py +++ b/sbb_binarize/sbb_binarize.py @@ -2,19 +2,20 @@ Tool to load model and binarize a given image. """ -from os import listdir +import sys +from os import listdir, environ, devnull from os.path import join from warnings import catch_warnings, simplefilter import numpy as np import cv2 +environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +stderr = sys.stderr +sys.stderr = open(devnull, 'w') from keras.models import load_model +sys.stderr = stderr import tensorflow as tf -# XXX better to set env var before tensorflow import to suppress those specific warnings -with catch_warnings(): - simplefilter("ignore") - def resize_image(img_in, input_height, input_width): return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) From fabb63834a6bfdff7ce711b4b6fd992a72f66ccb Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 15 Oct 2020 17:31:47 +0200 Subject: [PATCH 12/27] add assets subrepo --- .gitmodules | 3 +++ repo/assets | 1 + 2 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 repo/assets diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..5b24fbb --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "repo/assets"] + path = repo/assets + url = https://github.com/OCR-D/assets diff --git a/repo/assets b/repo/assets new file mode 160000 index 0000000..32fde9e --- /dev/null +++ b/repo/assets @@ -0,0 +1 @@ +Subproject commit 32fde9eb242c595a1986a193090c689f52eeb734 From 12b44af32943d233c1a7c6af7c13a414cb4e3428 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 15 Oct 2020 17:53:37 +0200 Subject: [PATCH 13/27] convert between cv2 and pil, DRY binarizer call --- sbb_binarize/ocrd_cli.py | 47 +++++++++++++++++++++--------------- sbb_binarize/sbb_binarize.py | 8 +++--- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index f4eac2c..0b8223e 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -4,7 +4,11 @@ import os.path from pkg_resources import resource_string from json import loads +from PIL import Image +import numpy as np +import cv2 from click import command + from ocrd_utils import ( getLogger, assert_file_grp_cardinality, @@ -21,6 +25,16 @@ from .sbb_binarize import SbbBinarizer OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8')) TOOL = 'ocrd-sbb-binarize' +def cv2pil(img): + color_coverted = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB) + return Image.fromarray(color_coverted) + +def pil2cv(img): + # from ocrd/workspace.py + color_conversion = cv2.COLOR_GRAY2BGR if img.mode in ('1', 'L') else cv2.COLOR_RGB2BGR + pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img) + return cv2.cvtColor(pil_as_np_array, color_conversion) + class SbbBinarizeProcessor(Processor): def __init__(self, *args, **kwargs): @@ -28,6 +42,14 @@ class SbbBinarizeProcessor(Processor): kwargs['version'] = OCRD_TOOL['version'] super().__init__(*args, **kwargs) + def _run_binarizer(self, img): + return cv2pil( + SbbBinarizer( + image=pil2cv(img), + model=self.model_path, + patches=self.use_patches, + save=None).run()) + def process(self): """ Binarize with sbb_binarization @@ -37,8 +59,8 @@ class SbbBinarizeProcessor(Processor): assert_file_grp_cardinality(self.output_file_grp, 1) oplevel = self.parameter['operation_level'] - use_patches = self.parameter['patches'] - model_path = self.parameter['model'] + self.use_patches = self.parameter['patches'] # pylint: disable=attribute-defined-outside-init + self.model_path = self.parameter['model'] # pylint: disable=attribute-defined-outside-init for n, input_file in enumerate(self.input_files): file_id = make_file_id(input_file, self.output_file_grp) @@ -52,12 +74,7 @@ class SbbBinarizeProcessor(Processor): if oplevel == 'page': LOG.info("Binarizing on 'page' level in page '%s'", page_id) page_image, page_xywh, _ = self.workspace.image_from_page(page, page_id) - bin_image = SbbBinarizer( - image=page_image, - model=model_path, - patches=use_patches, - save=None - ).run() + bin_image = self._run_binarizer(page_image) # update METS (add the image file): bin_image_path = self.workspace.save_image_file(bin_image, file_id + '.IMG-BIN', @@ -74,12 +91,7 @@ class SbbBinarizeProcessor(Processor): region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh) if oplevel == 'region': - region_image_bin = SbbBinarizer( - image=region_image, - model=model_path, - patches=use_patches, - save=None - ).run() + region_image_bin = self._run_binarizer(region_image) region_image_bin_path = self.workspace.save_image_file( region_image_bin, "%s_%s.IMG-BIN" % (file_id, region.id), @@ -94,12 +106,7 @@ class SbbBinarizeProcessor(Processor): LOG.warning("Page '%s' region '%s' contains no text lines", page_id, region.id) for line in lines: line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh) - line_image_bin = SbbBinarizer( - image=line_image, - model=model_path, - patches=use_patches, - save=None - ).run() + line_image_bin = self._run_binarizer(line_image) line_image_bin_path = self.workspace.save_image_file( line_image_bin, "%s_%s_%s.IMG-BIN" % (file_id, region.id, line.id), diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py index bb58f36..9769456 100644 --- a/sbb_binarize/sbb_binarize.py +++ b/sbb_binarize/sbb_binarize.py @@ -8,6 +8,7 @@ from os.path import join from warnings import catch_warnings, simplefilter import numpy as np +from PIL import Image import cv2 environ['TF_CPP_MIN_LOG_LEVEL'] = '3' stderr = sys.stderr @@ -23,9 +24,10 @@ class SbbBinarizer: # TODO use True/False for patches def __init__(self, model, image=None, image_path=None, patches='false', save=None): - if not(image or image_path) or (image and image_path): - raise ValueError("Must pass either a PIL image or an image_path") - if image: + if (image is not None and image_path is not None) or \ + (image is None and image_path is None): + raise ValueError("Must pass either a opencv2 image or an image_path") + if image is not None: self.image = image else: self.image = cv2.imread(self.image) From 0650bad533623b3b5b77983eb9223eafe21d4155 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 16 Oct 2020 10:59:42 +0200 Subject: [PATCH 14/27] image_from_segment: exclude already binarized images Co-authored-by: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> --- sbb_binarize/ocrd_cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index 0b8223e..ba1004f 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -73,7 +73,7 @@ class SbbBinarizeProcessor(Processor): if oplevel == 'page': LOG.info("Binarizing on 'page' level in page '%s'", page_id) - page_image, page_xywh, _ = self.workspace.image_from_page(page, page_id) + page_image, page_xywh, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized') bin_image = self._run_binarizer(page_image) # update METS (add the image file): bin_image_path = self.workspace.save_image_file(bin_image, @@ -88,7 +88,7 @@ class SbbBinarizeProcessor(Processor): LOG.warning("Page '%s' contains no text/table regions", page_id) for region in regions: - region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh) + region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized') if oplevel == 'region': region_image_bin = self._run_binarizer(region_image) @@ -105,7 +105,7 @@ class SbbBinarizeProcessor(Processor): if not lines: LOG.warning("Page '%s' region '%s' contains no text lines", page_id, region.id) for line in lines: - line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh) + line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized') line_image_bin = self._run_binarizer(line_image) line_image_bin_path = self.workspace.save_image_file( line_image_bin, From ee82d188bf133054ebd4e4501bef3d5b3b49aeb7 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 16 Oct 2020 11:00:18 +0200 Subject: [PATCH 15/27] Set page_id from input file Co-authored-by: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> --- sbb_binarize/ocrd_cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index ba1004f..7500bac 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -78,7 +78,7 @@ class SbbBinarizeProcessor(Processor): # update METS (add the image file): bin_image_path = self.workspace.save_image_file(bin_image, file_id + '.IMG-BIN', - page_id=page_id, + page_id=input_file.pageId, file_grp=self.output_file_grp) page.add_AlternativeImage(AlternativeImageType(filename=bin_image_path, comment="binarized")) @@ -95,7 +95,7 @@ class SbbBinarizeProcessor(Processor): region_image_bin_path = self.workspace.save_image_file( region_image_bin, "%s_%s.IMG-BIN" % (file_id, region.id), - page_id=page_id, + page_id=input_file.pageId, file_grp=self.output_file_grp) region.add_AlternativeImage( AlternativeImageType(filename=region_image_bin_path, comments='binarized')) @@ -110,7 +110,7 @@ class SbbBinarizeProcessor(Processor): line_image_bin_path = self.workspace.save_image_file( line_image_bin, "%s_%s_%s.IMG-BIN" % (file_id, region.id, line.id), - page_id=page_id, + page_id=input_file.pageId, file_grp=self.output_file_grp) line.add_AlternativeImage( AlternativeImageType(filename=line_image_bin_path, comments='binarized')) From 2eec6876fa45f01cbdcaadb248e8bf3418bb4265 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 16 Oct 2020 11:01:11 +0200 Subject: [PATCH 16/27] Append "binarized" to AlternativeImage/comments Co-authored-by: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> --- sbb_binarize/ocrd_cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index 7500bac..398fa6c 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -80,7 +80,7 @@ class SbbBinarizeProcessor(Processor): file_id + '.IMG-BIN', page_id=input_file.pageId, file_grp=self.output_file_grp) - page.add_AlternativeImage(AlternativeImageType(filename=bin_image_path, comment="binarized")) + page.add_AlternativeImage(AlternativeImageType(filename=bin_image_path, comment=page_xywh['features']+",binarized")) else: regions = page.get_AllRegions(['Text', 'Table']) @@ -98,7 +98,7 @@ class SbbBinarizeProcessor(Processor): page_id=input_file.pageId, file_grp=self.output_file_grp) region.add_AlternativeImage( - AlternativeImageType(filename=region_image_bin_path, comments='binarized')) + AlternativeImageType(filename=region_image_bin_path, comments=region_xywh['features']+',binarized')) elif oplevel == 'line': lines = region.get_TextLine() @@ -113,7 +113,7 @@ class SbbBinarizeProcessor(Processor): page_id=input_file.pageId, file_grp=self.output_file_grp) line.add_AlternativeImage( - AlternativeImageType(filename=line_image_bin_path, comments='binarized')) + AlternativeImageType(filename=line_image_bin_path, comments=line_xywh['features']+',binarized')) self.workspace.add_file( ID=file_id, From eece89b9acd40808d98da98093a942518ff4dc5c Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 16 Oct 2020 11:12:17 +0200 Subject: [PATCH 17/27] patches should be true by default, ht @vahidrezanezhad --- sbb_binarize/ocrd-tool.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbb_binarize/ocrd-tool.json b/sbb_binarize/ocrd-tool.json index e0c4795..e7ca0c8 100644 --- a/sbb_binarize/ocrd-tool.json +++ b/sbb_binarize/ocrd-tool.json @@ -19,7 +19,7 @@ "patches": { "description": "by setting this parameter to true you let the model to see the image in patches.", "type": "boolean", - "default": false + "default": true }, "model": { "description": "models directory.", From b7a630194862daf8d063a7d0c2ecf710dc287385 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 16 Oct 2020 11:27:37 +0200 Subject: [PATCH 18/27] fix cv2pil ht @bertsky --- sbb_binarize/ocrd_cli.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index 398fa6c..d755b53 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -1,5 +1,3 @@ -# TODO: AlternativeImage 'binarized' comment should be additive - import os.path from pkg_resources import resource_string from json import loads @@ -26,8 +24,7 @@ OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8')) TOOL = 'ocrd-sbb-binarize' def cv2pil(img): - color_coverted = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB) - return Image.fromarray(color_coverted) + return Image.fromarray(img.as_type('uint8')) def pil2cv(img): # from ocrd/workspace.py @@ -80,7 +77,7 @@ class SbbBinarizeProcessor(Processor): file_id + '.IMG-BIN', page_id=input_file.pageId, file_grp=self.output_file_grp) - page.add_AlternativeImage(AlternativeImageType(filename=bin_image_path, comment=page_xywh['features']+",binarized")) + page.add_AlternativeImage(AlternativeImageType(filename=bin_image_path, comment='%s,binarized' % page_xywh['features'])) else: regions = page.get_AllRegions(['Text', 'Table']) @@ -98,7 +95,7 @@ class SbbBinarizeProcessor(Processor): page_id=input_file.pageId, file_grp=self.output_file_grp) region.add_AlternativeImage( - AlternativeImageType(filename=region_image_bin_path, comments=region_xywh['features']+',binarized')) + AlternativeImageType(filename=region_image_bin_path, comments='%s,binarized' % region_xywh['features'])) elif oplevel == 'line': lines = region.get_TextLine() @@ -113,7 +110,7 @@ class SbbBinarizeProcessor(Processor): page_id=input_file.pageId, file_grp=self.output_file_grp) line.add_AlternativeImage( - AlternativeImageType(filename=line_image_bin_path, comments=line_xywh['features']+',binarized')) + AlternativeImageType(filename=line_image_bin_path, comments='%s,binarized' % line_xywh['features'])) self.workspace.add_file( ID=file_id, From e5bc5572a22f47c0b797ea144237f7d72ae1fc85 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 16 Oct 2020 11:31:14 +0200 Subject: [PATCH 19/27] change description ht @bertsky @vahidrezanezhad --- sbb_binarize/ocrd-tool.json | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sbb_binarize/ocrd-tool.json b/sbb_binarize/ocrd-tool.json index e7ca0c8..5bdf10f 100644 --- a/sbb_binarize/ocrd-tool.json +++ b/sbb_binarize/ocrd-tool.json @@ -4,7 +4,7 @@ "tools": { "ocrd-sbb-binarize": { "executable": "ocrd-sbb-binarize", - "description": "Smart binarization with sbb_binarization", + "description": "Pixelwise binarization with selectional auto-encoders in Keras", "categories": ["Image preprocessing"], "steps": ["preprocessing/optimization/binarization"], "input_file_grp": [], diff --git a/setup.py b/setup.py index 156f50d..2ad6418 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ install_requires = open('requirements.txt').read().split('\n') setup( name='sbb_binarization', version=version, - description='Binarization with ', + description='Pixelwise binarization with selectional auto-encoders in Keras', long_description=open('README.md').read(), long_description_content_type='text/markdown', author='Vahid Rezanezhad', From a1c8f6f4650591eda9cda8af5cb17237b6b2deb0 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 16 Oct 2020 11:53:26 +0200 Subject: [PATCH 20/27] line-level binarization independent of region-level --- sbb_binarize/ocrd_cli.py | 53 +++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index d755b53..d846212 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -79,38 +79,35 @@ class SbbBinarizeProcessor(Processor): file_grp=self.output_file_grp) page.add_AlternativeImage(AlternativeImageType(filename=bin_image_path, comment='%s,binarized' % page_xywh['features'])) - else: - regions = page.get_AllRegions(['Text', 'Table']) + elif oplevel == 'region': + regions = page.get_AllRegions(['Text', 'Table'], depth=1) if not regions: LOG.warning("Page '%s' contains no text/table regions", page_id) - for region in regions: region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized') - - if oplevel == 'region': - region_image_bin = self._run_binarizer(region_image) - region_image_bin_path = self.workspace.save_image_file( - region_image_bin, - "%s_%s.IMG-BIN" % (file_id, region.id), - page_id=input_file.pageId, - file_grp=self.output_file_grp) - region.add_AlternativeImage( - AlternativeImageType(filename=region_image_bin_path, comments='%s,binarized' % region_xywh['features'])) - - elif oplevel == 'line': - lines = region.get_TextLine() - if not lines: - LOG.warning("Page '%s' region '%s' contains no text lines", page_id, region.id) - for line in lines: - line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized') - line_image_bin = self._run_binarizer(line_image) - line_image_bin_path = self.workspace.save_image_file( - line_image_bin, - "%s_%s_%s.IMG-BIN" % (file_id, region.id, line.id), - page_id=input_file.pageId, - file_grp=self.output_file_grp) - line.add_AlternativeImage( - AlternativeImageType(filename=line_image_bin_path, comments='%s,binarized' % line_xywh['features'])) + region_image_bin = self._run_binarizer(region_image) + region_image_bin_path = self.workspace.save_image_file( + region_image_bin, + "%s_%s.IMG-BIN" % (file_id, region.id), + page_id=input_file.pageId, + file_grp=self.output_file_grp) + region.add_AlternativeImage( + AlternativeImageType(filename=region_image_bin_path, comments='%s,binarized' % region_xywh['features'])) + + elif oplevel == 'line': + region_line_tuples = [(r.id, r.get_TextLine()) for r in page.get_AllRegions(['Text'], depth=0)] + if not region_line_tuples: + LOG.warning("Page '%s' contains no text lines", page_id) + for region_id, line in region_line_tuples: + line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized') + line_image_bin = self._run_binarizer(line_image) + line_image_bin_path = self.workspace.save_image_file( + line_image_bin, + "%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id), + page_id=input_file.pageId, + file_grp=self.output_file_grp) + line.add_AlternativeImage( + AlternativeImageType(filename=line_image_bin_path, comments='%s,binarized' % line_xywh['features'])) self.workspace.add_file( ID=file_id, From 1fa581283c609289041dbad24e8aa0486276f81a Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 16 Oct 2020 12:21:44 +0200 Subject: [PATCH 21/27] SbbBinarizer: refactor (variable names, less instance-wide state) --- sbb_binarize/cli.py | 9 +--- sbb_binarize/ocrd_cli.py | 19 +++---- sbb_binarize/sbb_binarize.py | 96 ++++++++++++++++-------------------- 3 files changed, 51 insertions(+), 73 deletions(-) diff --git a/sbb_binarize/cli.py b/sbb_binarize/cli.py index 20881b5..1b3bc7e 100644 --- a/sbb_binarize/cli.py +++ b/sbb_binarize/cli.py @@ -16,13 +16,8 @@ def main(): options = parser.parse_args() - binarizer = SbbBinarizer( - image_path=options.image, - model=options.model, - patches=options.patches, - save=options.save - ) - binarizer.run() + binarizer = SbbBinarizer(model_dir=options.model) + binarizer.run(image_path=options.image, patches=options.patches, save=options.save) if __name__ == "__main__": main() diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index d846212..854586b 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -39,14 +39,6 @@ class SbbBinarizeProcessor(Processor): kwargs['version'] = OCRD_TOOL['version'] super().__init__(*args, **kwargs) - def _run_binarizer(self, img): - return cv2pil( - SbbBinarizer( - image=pil2cv(img), - model=self.model_path, - patches=self.use_patches, - save=None).run()) - def process(self): """ Binarize with sbb_binarization @@ -56,8 +48,9 @@ class SbbBinarizeProcessor(Processor): assert_file_grp_cardinality(self.output_file_grp, 1) oplevel = self.parameter['operation_level'] - self.use_patches = self.parameter['patches'] # pylint: disable=attribute-defined-outside-init - self.model_path = self.parameter['model'] # pylint: disable=attribute-defined-outside-init + use_patches = self.parameter['patches'] # pylint: disable=attribute-defined-outside-init + model_path = self.parameter['model'] # pylint: disable=attribute-defined-outside-init + binarizer = SbbBinarizer(model_dir=self.model_path) for n, input_file in enumerate(self.input_files): file_id = make_file_id(input_file, self.output_file_grp) @@ -71,7 +64,7 @@ class SbbBinarizeProcessor(Processor): if oplevel == 'page': LOG.info("Binarizing on 'page' level in page '%s'", page_id) page_image, page_xywh, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized') - bin_image = self._run_binarizer(page_image) + bin_image = cv2pil(binarizer.run(image=pil2cv(page_image), patches=use_patches)) # update METS (add the image file): bin_image_path = self.workspace.save_image_file(bin_image, file_id + '.IMG-BIN', @@ -85,7 +78,7 @@ class SbbBinarizeProcessor(Processor): LOG.warning("Page '%s' contains no text/table regions", page_id) for region in regions: region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized') - region_image_bin = self._run_binarizer(region_image) + region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), patches=use_patches)) region_image_bin_path = self.workspace.save_image_file( region_image_bin, "%s_%s.IMG-BIN" % (file_id, region.id), @@ -100,7 +93,7 @@ class SbbBinarizeProcessor(Processor): LOG.warning("Page '%s' contains no text lines", page_id) for region_id, line in region_line_tuples: line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized') - line_image_bin = self._run_binarizer(line_image) + line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), patches=use_patches)) line_image_bin_path = self.workspace.save_image_file( line_image_bin, "%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id), diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py index 9769456..51bccae 100644 --- a/sbb_binarize/sbb_binarize.py +++ b/sbb_binarize/sbb_binarize.py @@ -22,50 +22,35 @@ def resize_image(img_in, input_height, input_width): class SbbBinarizer: - # TODO use True/False for patches - def __init__(self, model, image=None, image_path=None, patches='false', save=None): - if (image is not None and image_path is not None) or \ - (image is None and image_path is None): - raise ValueError("Must pass either a opencv2 image or an image_path") - if image is not None: - self.image = image - else: - self.image = cv2.imread(self.image) - self.patches = patches - self.save = save - self.model_dir = model + def __init__(self, model_dir): + self.model_dir = model_dir - def start_new_session_and_model(self): + def start_new_session(self): config = tf.ConfigProto() config.gpu_options.allow_growth = True self.session = tf.Session(config=config) # tf.InteractiveSession() - def load_model(self, model_name): - - self.model = load_model(join(self.model_dir, model_name), compile=False) - - self.img_height = self.model.layers[len(self.model.layers)-1].output_shape[1] - self.img_width = self.model.layers[len(self.model.layers)-1].output_shape[2] - self.n_classes = self.model.layers[len(self.model.layers)-1].output_shape[3] - def end_session(self): self.session.close() - del self.model del self.session - def predict(self,model_name): - self.load_model(model_name) - img = self.image - img_width_model = self.img_width - img_height_model = self.img_height + def load_model(self, model_name): + model = load_model(join(self.model_dir, model_name), compile=False) + model_height = model.layers[len(model.layers)-1].output_shape[1] + model_width = model.layers[len(model.layers)-1].output_shape[2] + n_classes = model.layers[len(model.layers)-1].output_shape[3] + return model, model_height, model_width, n_classes + + def predict(self, model_name, img, patches): + model, model_height, model_width, n_classes = self.load_model(model_name) - if self.patches in ('true', 'True'): + if patches in ('true', 'True'): - margin = int(0.1 * img_width_model) + margin = int(0.1 * model_width) - width_mid = img_width_model - 2 * margin - height_mid = img_height_model - 2 * margin + width_mid = model_width - 2 * margin + height_mid = model_height - 2 * margin img = img / float(255.0) @@ -93,28 +78,28 @@ class SbbBinarizer: if i == 0: index_x_d = i * width_mid - index_x_u = index_x_d + img_width_model + index_x_u = index_x_d + model_width elif i > 0: index_x_d = i * width_mid - index_x_u = index_x_d + img_width_model + index_x_u = index_x_d + model_width if j == 0: index_y_d = j * height_mid - index_y_u = index_y_d + img_height_model + index_y_u = index_y_d + model_height elif j > 0: index_y_d = j * height_mid - index_y_u = index_y_d + img_height_model + index_y_u = index_y_d + model_height if index_x_u > img_w: index_x_u = img_w - index_x_d = img_w - img_width_model + index_x_d = img_w - model_width if index_y_u > img_h: index_y_u = img_h - index_y_d = img_h - img_height_model + index_y_d = img_h - model_height img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = self.model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) + label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) seg = np.argmax(label_p_pred, axis=3)[0] @@ -189,10 +174,9 @@ class SbbBinarizer: img_h_page = img.shape[0] img_w_page = img.shape[1] img = img / float(255.0) - img = resize_image(img, img_height_model, img_width_model) + img = resize_image(img, model_height, model_width) - label_p_pred = self.model.predict( - img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) + label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) seg = np.argmax(label_p_pred, axis=3)[0] seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) @@ -200,29 +184,35 @@ class SbbBinarizer: prediction_true = prediction_true.astype(np.uint8) return prediction_true[:,:,0] - def run(self): - self.start_new_session_and_model() - models_n = listdir(self.model_dir) + # TODO use True/False for patches + def run(self, image=None, image_path=None, save=None, patches='false'): + if (image is not None and image_path is not None) or \ + (image is None and image_path is None): + raise ValueError("Must pass either a opencv2 image or an image_path") + if image_path is not None: + image = cv2.imread(image) + self.start_new_session() + list_of_model_files = listdir(self.model_dir) img_last = 0 - for model_in in models_n: + for model_in in list_of_model_files: - res = self.predict(model_in) + res = self.predict(model_in, image, patches) img_fin = np.zeros((res.shape[0], res.shape[1], 3)) res[:, :][res[:, :] == 0] = 2 - res = res-1 - res = res*255 + res = res - 1 + res = res * 255 img_fin[:, :, 0] = res img_fin[:, :, 1] = res img_fin[:, :, 2] = res img_fin = img_fin.astype(np.uint8) - img_fin = (res[:, :] == 0)*255 - img_last = img_last+img_fin + img_fin = (res[:, :] == 0) * 255 + img_last = img_last + img_fin kernel = np.ones((5, 5), np.uint8) img_last[:, :][img_last[:, :] > 0] = 255 - img_last = (img_last[:, :] == 0)*255 - if self.save: - cv2.imwrite(self.save, img_last) + img_last = (img_last[:, :] == 0) * 255 + if save: + cv2.imwrite(save, img_last) return img_last From 645ec4124a539827e0a982a0c2aea55aa3f37985 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 16 Oct 2020 12:35:58 +0200 Subject: [PATCH 22/27] ocrd-tool: disable "line" level-of-operation for now --- sbb_binarize/ocrd-tool.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbb_binarize/ocrd-tool.json b/sbb_binarize/ocrd-tool.json index 5bdf10f..8a1b1b9 100644 --- a/sbb_binarize/ocrd-tool.json +++ b/sbb_binarize/ocrd-tool.json @@ -12,7 +12,7 @@ "parameters": { "operation_level": { "type": "string", - "enum": ["page", "region", "line"], + "enum": ["page", "region"], "default": "page", "description": "PAGE XML hierarchy level to operate on" }, From fad7b7aff1354e679d88b97c334e1d93e8c51a77 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 16 Oct 2020 12:48:32 +0200 Subject: [PATCH 23/27] rewrite "normal" CLI with click --- sbb_binarize/cli.py | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/sbb_binarize/cli.py b/sbb_binarize/cli.py index 1b3bc7e..1ae1aa4 100644 --- a/sbb_binarize/cli.py +++ b/sbb_binarize/cli.py @@ -2,22 +2,15 @@ sbb_binarize CLI """ -from argparse import ArgumentParser +from click import command, option, argument, version_option from .sbb_binarize import SbbBinarizer -def main(): - parser = ArgumentParser() - - parser.add_argument('-i', '--image', default=None, help='image.') - parser.add_argument('-p', '--patches', default=False, help='by setting this parameter to true you let the model to see the image in patches.') - parser.add_argument('-s', '--save', default=False, help='save prediction with a given name here. The name and format should be given (outputname.tif).') - parser.add_argument('-m', '--model', default=None, help='models directory.') - - options = parser.parse_args() - - binarizer = SbbBinarizer(model_dir=options.model) - binarizer.run(image_path=options.image, patches=options.patches, save=options.save) - -if __name__ == "__main__": - main() +@command() +@version_option() +@option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.') +@option('--model-dir', '-m', required=True, help='directory containing models for prediction') +@argument('input_image') +@argument('output_image') +def main(patches, model_dir, input_image, output_image): + SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image) From 2bc6ccc4c0b67042964a3876c549538f2ee08489 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 16 Oct 2020 12:50:32 +0200 Subject: [PATCH 24/27] replace patches string comparison with use_patches boolean --- sbb_binarize/ocrd_cli.py | 8 ++++---- sbb_binarize/sbb_binarize.py | 9 ++++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index 854586b..14b9b9c 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -50,7 +50,7 @@ class SbbBinarizeProcessor(Processor): oplevel = self.parameter['operation_level'] use_patches = self.parameter['patches'] # pylint: disable=attribute-defined-outside-init model_path = self.parameter['model'] # pylint: disable=attribute-defined-outside-init - binarizer = SbbBinarizer(model_dir=self.model_path) + binarizer = SbbBinarizer(model_dir=model_path) for n, input_file in enumerate(self.input_files): file_id = make_file_id(input_file, self.output_file_grp) @@ -64,7 +64,7 @@ class SbbBinarizeProcessor(Processor): if oplevel == 'page': LOG.info("Binarizing on 'page' level in page '%s'", page_id) page_image, page_xywh, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized') - bin_image = cv2pil(binarizer.run(image=pil2cv(page_image), patches=use_patches)) + bin_image = cv2pil(binarizer.run(image=pil2cv(page_image), use_patches=use_patches)) # update METS (add the image file): bin_image_path = self.workspace.save_image_file(bin_image, file_id + '.IMG-BIN', @@ -78,7 +78,7 @@ class SbbBinarizeProcessor(Processor): LOG.warning("Page '%s' contains no text/table regions", page_id) for region in regions: region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized') - region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), patches=use_patches)) + region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), use_patches=use_patches)) region_image_bin_path = self.workspace.save_image_file( region_image_bin, "%s_%s.IMG-BIN" % (file_id, region.id), @@ -93,7 +93,7 @@ class SbbBinarizeProcessor(Processor): LOG.warning("Page '%s' contains no text lines", page_id) for region_id, line in region_line_tuples: line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized') - line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), patches=use_patches)) + line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), use_patches=use_patches)) line_image_bin_path = self.workspace.save_image_file( line_image_bin, "%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id), diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py index 51bccae..a664d6d 100644 --- a/sbb_binarize/sbb_binarize.py +++ b/sbb_binarize/sbb_binarize.py @@ -42,10 +42,10 @@ class SbbBinarizer: n_classes = model.layers[len(model.layers)-1].output_shape[3] return model, model_height, model_width, n_classes - def predict(self, model_name, img, patches): + def predict(self, model_name, img, use_patches): model, model_height, model_width, n_classes = self.load_model(model_name) - if patches in ('true', 'True'): + if use_patches: margin = int(0.1 * model_width) @@ -184,8 +184,7 @@ class SbbBinarizer: prediction_true = prediction_true.astype(np.uint8) return prediction_true[:,:,0] - # TODO use True/False for patches - def run(self, image=None, image_path=None, save=None, patches='false'): + def run(self, image=None, image_path=None, save=None, use_patches=False): if (image is not None and image_path is not None) or \ (image is None and image_path is None): raise ValueError("Must pass either a opencv2 image or an image_path") @@ -196,7 +195,7 @@ class SbbBinarizer: img_last = 0 for model_in in list_of_model_files: - res = self.predict(model_in, image, patches) + res = self.predict(model_in, image, use_patches) img_fin = np.zeros((res.shape[0], res.shape[1], 3)) res[:, :][res[:, :] == 0] = 2 From 6543e74deb3ed1ec5bffcc8d4d089bfb7f901702 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 16 Oct 2020 12:52:16 +0200 Subject: [PATCH 25/27] remove "patches" parameter, always use_patches --- sbb_binarize/ocrd-tool.json | 5 ----- sbb_binarize/ocrd_cli.py | 7 +++---- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/sbb_binarize/ocrd-tool.json b/sbb_binarize/ocrd-tool.json index 8a1b1b9..3095eeb 100644 --- a/sbb_binarize/ocrd-tool.json +++ b/sbb_binarize/ocrd-tool.json @@ -16,11 +16,6 @@ "default": "page", "description": "PAGE XML hierarchy level to operate on" }, - "patches": { - "description": "by setting this parameter to true you let the model to see the image in patches.", - "type": "boolean", - "default": true - }, "model": { "description": "models directory.", "type": "string", diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index 14b9b9c..9868c20 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -48,7 +48,6 @@ class SbbBinarizeProcessor(Processor): assert_file_grp_cardinality(self.output_file_grp, 1) oplevel = self.parameter['operation_level'] - use_patches = self.parameter['patches'] # pylint: disable=attribute-defined-outside-init model_path = self.parameter['model'] # pylint: disable=attribute-defined-outside-init binarizer = SbbBinarizer(model_dir=model_path) @@ -64,7 +63,7 @@ class SbbBinarizeProcessor(Processor): if oplevel == 'page': LOG.info("Binarizing on 'page' level in page '%s'", page_id) page_image, page_xywh, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized') - bin_image = cv2pil(binarizer.run(image=pil2cv(page_image), use_patches=use_patches)) + bin_image = cv2pil(binarizer.run(image=pil2cv(page_image), use_patches=True)) # update METS (add the image file): bin_image_path = self.workspace.save_image_file(bin_image, file_id + '.IMG-BIN', @@ -78,7 +77,7 @@ class SbbBinarizeProcessor(Processor): LOG.warning("Page '%s' contains no text/table regions", page_id) for region in regions: region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized') - region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), use_patches=use_patches)) + region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), use_patches=True)) region_image_bin_path = self.workspace.save_image_file( region_image_bin, "%s_%s.IMG-BIN" % (file_id, region.id), @@ -93,7 +92,7 @@ class SbbBinarizeProcessor(Processor): LOG.warning("Page '%s' contains no text lines", page_id) for region_id, line in region_line_tuples: line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized') - line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), use_patches=use_patches)) + line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), use_patches=True)) line_image_bin_path = self.workspace.save_image_file( line_image_bin, "%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id), From a24670edcc05527c95c49fa946e2e4b09993f9dd Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 16 Oct 2020 13:31:28 +0200 Subject: [PATCH 26/27] minimal CI setup --- .circleci/config.yml | 47 ++++++++++++++++++++++++++++++++++++++++++++ Makefile | 34 +++++++++++++++++++++++++++++++- setup.py | 1 + 3 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..ca93957 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,47 @@ +version: 2 + +jobs: + + build-python36: + docker: + - image: python:3.6 + steps: + - checkout + - restore_cache: + keys: + - model-cache + - run: make model + - save_cache: + key: model-cache + paths: + models.tar.gz + models + - run: make install + - run: git submodule update --init + - run: make test + + build-python37: + docker: + - image: python:3.7 + steps: + - checkout + - restore_cache: + keys: + - model-cache + - run: make model + - save_cache: + key: model-cache + paths: + models.tar.gz + models + - run: make install + - run: git submodule update --init + - run: make test + +workflows: + version: 2 + build: + jobs: + - build-python36 + - build-python37 + #- build-python38 # no tensorflow for python 3.8 diff --git a/Makefile b/Makefile index f92ba15..95ddbfe 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,36 @@ -all: install +# Directory to store models +MODEL_DIR = $(PWD)/models +# BEGIN-EVAL makefile-parser --make-help Makefile + +help: + @echo "" + @echo " Targets" + @echo "" + @echo " install Install with pip" + @echo " model Downloads the pre-trained models from qurator-data.de" + @echo " test Run tests" + @echo "" + @echo " Variables" + @echo "" + @echo " MODEL_DIR Directory to store models" + +# END-EVAL + +# Install with pip install: pip install . + +# Downloads the pre-trained models from qurator-data.de +model: $(MODEL_DIR)/model1_bin.h5 + +$(MODEL_DIR)/model1_bin.h5: models.tar.gz + tar xf models.tar.gz + +models.tar.gz: + wget 'https://qurator-data.de/sbb_binarization/models.tar.gz' + +# Run tests +test: model + cd repo/assets/data/kant_aufklaerung_1784/data; ocrd-sbb-binarize -I OCR-D-IMG -O BIN -P model $(MODEL_DIR) + cd repo/assets/data/kant_aufklaerung_1784-page-region/data; ocrd-sbb-binarize -I OCR-D-IMG -O BIN -P model $(MODEL_DIR) -P level-of-operation region diff --git a/setup.py b/setup.py index 2ad6418..7ab6e02 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ setup( license='Apache License 2.0', packages=find_packages(exclude=('tests', 'docs')), include_package_data=True, + package_data={'': ['*.json', '*.yml', '*.yaml']}, install_requires=install_requires, entry_points={ 'console_scripts': [ From 84d6b1f69337b52807b566f1d760007e68dc8467 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 16 Oct 2020 17:24:55 +0200 Subject: [PATCH 27/27] :bug: numpy.ndarray.as{_,}type --- sbb_binarize/ocrd_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index 9868c20..df4daef 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -24,7 +24,7 @@ OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8')) TOOL = 'ocrd-sbb-binarize' def cv2pil(img): - return Image.fromarray(img.as_type('uint8')) + return Image.fromarray(img.astype('uint8')) def pil2cv(img): # from ocrd/workspace.py