diff --git a/CHANGELOG.md b/CHANGELOG.md index f2811a4..0ec8078 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +## [0.0.10] - 2022-10-24 + +Added: + + * Trained models listed in ocrd-tool.json for download with OCR-D resource manager, #53 + ## [0.0.10] - 2022-07-21 Fixed: @@ -71,6 +77,7 @@ Fixed: Initial release +[0.0.11]: ../../compare/v0.0.11...v0.0.10 [0.0.10]: ../../compare/v0.0.10...v0.0.9 [0.0.9]: ../../compare/v0.0.9...v0.0.8 [0.0.8]: ../../compare/v0.0.8...v0.0.7 diff --git a/README.md b/README.md index 168532b..3285263 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ ## Introduction -This tool performs document image binarization using trained models. The method is based on [Calvo-Zaragoza and Gallego, 2018](https://arxiv.org/abs/1706.10241). +This tool performs document image binarization using a trained ResNet50-UNet model. ## Installation @@ -18,10 +18,14 @@ Clone the repository, enter it and run ### Models -Pre-trained models can be downloaded from here: +Pre-trained models in `HDF5` format can be downloaded from here: https://qurator-data.de/sbb_binarization/ +We also provide a Tensorflow `saved_model` via Huggingface: + +https://huggingface.co/SBB/sbb_binarization + ## Usage ```sh @@ -31,7 +35,9 @@ sbb_binarize \ ``` -Example +Images containing a lot of border noise (black pixels) should be cropped beforehand to improve the quality of results. + +### Example ```sh sbb_binarize -m /path/to/models/ myimage.tif myimage-bin.tif diff --git a/requirements.txt b/requirements.txt index 1f6e5c8..b6fb627 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ numpy setuptools >= 41 opencv-python-headless -ocrd >= 2.22.3 +ocrd >= 2.38.0 tensorflow >= 2.4.0 mpire \ No newline at end of file diff --git a/sbb_binarize/ocrd-tool.json b/sbb_binarize/ocrd-tool.json index 158cb07..9148309 100644 --- a/sbb_binarize/ocrd-tool.json +++ b/sbb_binarize/ocrd-tool.json @@ -1,5 +1,5 @@ { - "version": "0.0.10", + "version": "0.0.11", "git_url": "https://github.com/qurator-spk/sbb_binarization", "tools": { "ocrd-sbb-binarize": { @@ -17,13 +17,31 @@ "description": "PAGE XML hierarchy level to operate on" }, "model": { - "description": "Directory containing HDF5 models. Can be an absolute path or a path relative to the current working directory or $SBB_BINARIZE_DATA environment variable (if set)", + "description": "Directory containing HDF5 or SavedModel/ProtoBuf models. Can be an absolute path or a path relative to the OCR-D resource location, the current working directory or the $SBB_BINARIZE_DATA environment variable (if set)", "type": "string", "format": "uri", "content-type": "text/directory", "required": true } - } + }, + "resources": [ + { + "url": "https://github.com/apacha/sbb_binarization/releases/download/pre-trained-models/model_2020_01_16.zip", + "name": "default", + "type": "archive", + "path_in_archive": "model_2020_01_16", + "size": 562917559, + "description": "default models provided by github.com/qurator-spk" + }, + { + "url": "https://github.com/apacha/sbb_binarization/releases/download/pre-trained-models/model_2021_03_09.zip", + "name": "default-2021-03-09", + "type": "archive", + "path_in_archive": ".", + "size": 133693693, + "description": "updated default models provided by github.com/qurator-spk" + } + ] } } } diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index 57438d3..44a001f 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -30,6 +30,10 @@ def cv2pil(img): def pil2cv(img): # from ocrd/workspace.py + if img.mode in ('LA', 'RGBA'): + newimg = Image.new(img.mode[:-1], img.size, 'white') + newimg.paste(img, mask=img.getchannel('A')) + img = newimg color_conversion = cv2.COLOR_GRAY2BGR if img.mode in ('1', 'L') else cv2.COLOR_RGB2BGR pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img) return cv2.cvtColor(pil_as_np_array, color_conversion) @@ -106,7 +110,7 @@ class SbbBinarizeProcessor(Processor): if oplevel == 'page': LOG.info("Binarizing on 'page' level in page '%s'", page_id) - bin_image = cv2pil(self.binarizer.run(image=pil2cv(page_image), use_patches=True)) + bin_image = cv2pil(self.binarizer.run(image=pil2cv(page_image))) # update METS (add the image file): bin_image_path = self.workspace.save_image_file(bin_image, file_id + '.IMG-BIN', @@ -120,7 +124,7 @@ class SbbBinarizeProcessor(Processor): LOG.warning("Page '%s' contains no text/table regions", page_id) for region in regions: region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized') - region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), use_patches=True)) + region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image))) region_image_bin_path = self.workspace.save_image_file( region_image_bin, "%s_%s.IMG-BIN" % (file_id, region.id), @@ -135,7 +139,7 @@ class SbbBinarizeProcessor(Processor): LOG.warning("Page '%s' contains no text lines", page_id) for region_id, line in region_line_tuples: line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized') - line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), use_patches=True)) + line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image))) line_image_bin_path = self.workspace.save_image_file( line_image_bin, "%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id),