From 0c31eaf6b19e5a2a7548a8e38d3bb92658a9e4fc Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 22 Oct 2020 18:37:08 +0200 Subject: [PATCH 1/4] ocrd cli: "model" parameter replaceable by SBB_BINARIZE_DATA envvar --- sbb_binarize/ocrd-tool.json | 2 +- sbb_binarize/ocrd_cli.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sbb_binarize/ocrd-tool.json b/sbb_binarize/ocrd-tool.json index 3095eeb..ec5e39d 100644 --- a/sbb_binarize/ocrd-tool.json +++ b/sbb_binarize/ocrd-tool.json @@ -19,7 +19,7 @@ "model": { "description": "models directory.", "type": "string", - "required": true + "required": false } } } diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index df4daef..098da9c 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -37,6 +37,14 @@ class SbbBinarizeProcessor(Processor): def __init__(self, *args, **kwargs): kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL] kwargs['version'] = OCRD_TOOL['version'] + if not(kwargs.get('show_help', None) or kwargs.get('dump_json', None) or kwargs.get('show_version')): + if not 'parameter' in kwargs: + kwargs['parameter'] = {} + if not 'model' in kwargs['parameter']: + if 'SBB_BINARIZE_DATA' in os.environ: + kwargs['parameter']['model'] = os.environ['SBB_BINARIZE_DATA'] + else: + raise ValueError("Must pass 'model' parameter or set SBB_BINARIZE_DATA environment variable") super().__init__(*args, **kwargs) def process(self): From 664a441a2de9c388ed1083795902a06e4d5ca2f6 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 23 Oct 2020 11:30:31 +0200 Subject: [PATCH 2/4] glob *.h5 files only in model dir, fix #7 --- sbb_binarize/sbb_binarize.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py index a664d6d..8775836 100644 --- a/sbb_binarize/sbb_binarize.py +++ b/sbb_binarize/sbb_binarize.py @@ -3,7 +3,8 @@ Tool to load model and binarize a given image. """ import sys -from os import listdir, environ, devnull +from glob import glob +from os import environ, devnull from os.path import join from warnings import catch_warnings, simplefilter @@ -191,7 +192,7 @@ class SbbBinarizer: if image_path is not None: image = cv2.imread(image) self.start_new_session() - list_of_model_files = listdir(self.model_dir) + list_of_model_files = glob('%s/*.h5' % self.model_dir) img_last = 0 for model_in in list_of_model_files: From c4b63fca47d9db72c2c44700946b7bdbe439540c Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 23 Oct 2020 13:51:52 +0200 Subject: [PATCH 3/4] :bug: typo: comment{,s}, fix #8 --- sbb_binarize/ocrd_cli.py | 4 ++-- sbb_binarize/sbb_binarize.py | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index 098da9c..9535391 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -57,7 +57,7 @@ class SbbBinarizeProcessor(Processor): oplevel = self.parameter['operation_level'] model_path = self.parameter['model'] # pylint: disable=attribute-defined-outside-init - binarizer = SbbBinarizer(model_dir=model_path) + binarizer = SbbBinarizer(model_dir=model_path, logger=LOG) for n, input_file in enumerate(self.input_files): file_id = make_file_id(input_file, self.output_file_grp) @@ -77,7 +77,7 @@ class SbbBinarizeProcessor(Processor): file_id + '.IMG-BIN', page_id=input_file.pageId, file_grp=self.output_file_grp) - page.add_AlternativeImage(AlternativeImageType(filename=bin_image_path, comment='%s,binarized' % page_xywh['features'])) + page.add_AlternativeImage(AlternativeImageType(filename=bin_image_path, comments='%s,binarized' % page_xywh['features'])) elif oplevel == 'region': regions = page.get_AllRegions(['Text', 'Table'], depth=1) diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py index a664d6d..dc769dc 100644 --- a/sbb_binarize/sbb_binarize.py +++ b/sbb_binarize/sbb_binarize.py @@ -17,13 +17,16 @@ from keras.models import load_model sys.stderr = stderr import tensorflow as tf +import logging + def resize_image(img_in, input_height, input_width): return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) class SbbBinarizer: - def __init__(self, model_dir): + def __init__(self, model_dir, logger=None): self.model_dir = model_dir + self.log = logger if logger else logging.getLogger('SbbBinarizer') def start_new_session(self): config = tf.ConfigProto() @@ -193,7 +196,8 @@ class SbbBinarizer: self.start_new_session() list_of_model_files = listdir(self.model_dir) img_last = 0 - for model_in in list_of_model_files: + for n, model_in in enumerate(list_of_model_files): + self.log.info('Predicting with model %s [%s/%s]' % (model_in, n + 1, len(list_of_model_files))) res = self.predict(model_in, image, use_patches) From b10b49c6ce50b08f9abbf38d471e7dc3b3505d6c Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 23 Oct 2020 17:08:34 +0200 Subject: [PATCH 4/4] :package: v0.0.2 --- .gitignore | 2 ++ CHANGELOG.md | 23 +++++++++++++++++++++++ sbb_binarize/ocrd-tool.json | 2 +- 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 CHANGELOG.md diff --git a/.gitignore b/.gitignore index c14b1f1..c7737dc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ *.egg-info __pycache__ +/build +/dist diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..b38da2c --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,23 @@ +Change Log +========== +Versioned according to [Semantic Versioning](http://semver.org/). + +## Unreleased + +## 0.0.2 + +Changed: + + * `SBB_BINARIZE_DATA` can replace `model` parameter, #6 + +Fixed: + + * AlternativeImage/comments now set on page level, #8, #11 + * Only try to load `*.h5` model files, #7, #10 + +## 0.0.1 + +Initial release + + +[0.0.2]: ../../compare/v0.0.1...v0.0.2 diff --git a/sbb_binarize/ocrd-tool.json b/sbb_binarize/ocrd-tool.json index ec5e39d..9ee36cc 100644 --- a/sbb_binarize/ocrd-tool.json +++ b/sbb_binarize/ocrd-tool.json @@ -1,5 +1,5 @@ { - "version": "0.0.1", + "version": "0.0.2", "git_url": "https://github.com/qurator-spk/sbb_binarization", "tools": { "ocrd-sbb-binarize": {