From 7c3f2176f7ecca60528efe7fadf96cc85d2c9927 Mon Sep 17 00:00:00 2001 From: vahid Date: Tue, 11 Oct 2022 19:18:40 +0200 Subject: [PATCH] issue #45 the patches option is omitted and it means that documents will be processed in patches while no patches is not desired by the tool --- sbb_binarize/cli.py | 5 +- sbb_binarize/ocrd_cli.py | 6 +- sbb_binarize/sbb_binarize.py | 206 ++++++++++++++++------------------- 3 files changed, 100 insertions(+), 117 deletions(-) diff --git a/sbb_binarize/cli.py b/sbb_binarize/cli.py index 0077bef..ddfbde6 100644 --- a/sbb_binarize/cli.py +++ b/sbb_binarize/cli.py @@ -7,9 +7,8 @@ from .sbb_binarize import SbbBinarizer @command() @version_option() -@option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.') @option('--model-dir', '-m', type=types.Path(exists=True, file_okay=False), required=True, help='directory containing models for prediction') @argument('input_image') @argument('output_image') -def main(patches, model_dir, input_image, output_image): - SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image) +def main(model_dir, input_image, output_image): + SbbBinarizer(model_dir).run(image_path=input_image, save=output_image) diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index 9737bad..44a001f 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -110,7 +110,7 @@ class SbbBinarizeProcessor(Processor): if oplevel == 'page': LOG.info("Binarizing on 'page' level in page '%s'", page_id) - bin_image = cv2pil(self.binarizer.run(image=pil2cv(page_image), use_patches=True)) + bin_image = cv2pil(self.binarizer.run(image=pil2cv(page_image))) # update METS (add the image file): bin_image_path = self.workspace.save_image_file(bin_image, file_id + '.IMG-BIN', @@ -124,7 +124,7 @@ class SbbBinarizeProcessor(Processor): LOG.warning("Page '%s' contains no text/table regions", page_id) for region in regions: region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized') - region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), use_patches=True)) + region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image))) region_image_bin_path = self.workspace.save_image_file( region_image_bin, "%s_%s.IMG-BIN" % (file_id, region.id), @@ -139,7 +139,7 @@ class SbbBinarizeProcessor(Processor): LOG.warning("Page '%s' contains no text lines", page_id) for region_id, line in region_line_tuples: line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized') - line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), use_patches=True)) + line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image))) line_image_bin_path = self.workspace.save_image_file( line_image_bin, "%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id), diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py index 247d54b..7016020 100644 --- a/sbb_binarize/sbb_binarize.py +++ b/sbb_binarize/sbb_binarize.py @@ -62,7 +62,7 @@ class SbbBinarizer: n_classes = model.layers[len(model.layers)-1].output_shape[3] return model, model_height, model_width, n_classes - def predict(self, model_in, img, use_patches): + def predict(self, model_in, img): tensorflow_backend.set_session(self.session) model, model_height, model_width, n_classes = model_in @@ -101,152 +101,136 @@ class SbbBinarizer: img = np.copy(img_padded) - - - - if use_patches: - - margin = int(0.1 * model_width) - width_mid = model_width - 2 * margin - height_mid = model_height - 2 * margin + margin = int(0.1 * model_width) + width_mid = model_width - 2 * margin + height_mid = model_height - 2 * margin - img = img / float(255.0) - img_h = img.shape[0] - img_w = img.shape[1] + img = img / float(255.0) - prediction_true = np.zeros((img_h, img_w, 3)) - mask_true = np.zeros((img_h, img_w)) - nxf = img_w / float(width_mid) - nyf = img_h / float(height_mid) + img_h = img.shape[0] + img_w = img.shape[1] - if nxf > int(nxf): - nxf = int(nxf) + 1 - else: - nxf = int(nxf) + prediction_true = np.zeros((img_h, img_w, 3)) + mask_true = np.zeros((img_h, img_w)) + nxf = img_w / float(width_mid) + nyf = img_h / float(height_mid) - if nyf > int(nyf): - nyf = int(nyf) + 1 - else: - nyf = int(nyf) - - for i in range(nxf): - for j in range(nyf): + if nxf > int(nxf): + nxf = int(nxf) + 1 + else: + nxf = int(nxf) - if i == 0: - index_x_d = i * width_mid - index_x_u = index_x_d + model_width - elif i > 0: - index_x_d = i * width_mid - index_x_u = index_x_d + model_width + if nyf > int(nyf): + nyf = int(nyf) + 1 + else: + nyf = int(nyf) - if j == 0: - index_y_d = j * height_mid - index_y_u = index_y_d + model_height - elif j > 0: - index_y_d = j * height_mid - index_y_u = index_y_d + model_height + for i in range(nxf): + for j in range(nyf): - if index_x_u > img_w: - index_x_u = img_w - index_x_d = img_w - model_width - if index_y_u > img_h: - index_y_u = img_h - index_y_d = img_h - model_height + if i == 0: + index_x_d = i * width_mid + index_x_u = index_x_d + model_width + elif i > 0: + index_x_d = i * width_mid + index_x_u = index_x_d + model_width - img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + if j == 0: + index_y_d = j * height_mid + index_y_u = index_y_d + model_height + elif j > 0: + index_y_d = j * height_mid + index_y_u = index_y_d + model_height - label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - model_width + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - model_height - seg = np.argmax(label_p_pred, axis=3)[0] + img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) + label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) - if i == 0 and j == 0: - seg_color = seg_color[0:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :] - seg = seg[0:seg.shape[0] - margin, 0:seg.shape[1] - margin] + seg = np.argmax(label_p_pred, axis=3)[0] - mask_true[index_y_d + 0:index_y_u - margin, index_x_d + 0:index_x_u - margin] = seg - prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + 0:index_x_u - margin, :] = seg_color + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - elif i == nxf-1 and j == nyf-1: - seg_color = seg_color[margin:seg_color.shape[0] - 0, margin:seg_color.shape[1] - 0, :] - seg = seg[margin:seg.shape[0] - 0, margin:seg.shape[1] - 0] + if i == 0 and j == 0: + seg_color = seg_color[0:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :] + seg = seg[0:seg.shape[0] - margin, 0:seg.shape[1] - margin] - mask_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - 0] = seg - prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - 0, :] = seg_color + mask_true[index_y_d + 0:index_y_u - margin, index_x_d + 0:index_x_u - margin] = seg + prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + 0:index_x_u - margin, :] = seg_color - elif i == 0 and j == nyf-1: - seg_color = seg_color[margin:seg_color.shape[0] - 0, 0:seg_color.shape[1] - margin, :] - seg = seg[margin:seg.shape[0] - 0, 0:seg.shape[1] - margin] + elif i == nxf-1 and j == nyf-1: + seg_color = seg_color[margin:seg_color.shape[0] - 0, margin:seg_color.shape[1] - 0, :] + seg = seg[margin:seg.shape[0] - 0, margin:seg.shape[1] - 0] - mask_true[index_y_d + margin:index_y_u - 0, index_x_d + 0:index_x_u - margin] = seg - prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + 0:index_x_u - margin, :] = seg_color + mask_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - 0] = seg + prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - 0, :] = seg_color - elif i == nxf-1 and j == 0: - seg_color = seg_color[0:seg_color.shape[0] - margin, margin:seg_color.shape[1] - 0, :] - seg = seg[0:seg.shape[0] - margin, margin:seg.shape[1] - 0] + elif i == 0 and j == nyf-1: + seg_color = seg_color[margin:seg_color.shape[0] - 0, 0:seg_color.shape[1] - margin, :] + seg = seg[margin:seg.shape[0] - 0, 0:seg.shape[1] - margin] - mask_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - 0] = seg - prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - 0, :] = seg_color + mask_true[index_y_d + margin:index_y_u - 0, index_x_d + 0:index_x_u - margin] = seg + prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + 0:index_x_u - margin, :] = seg_color - elif i == 0 and j != 0 and j != nyf-1: - seg_color = seg_color[margin:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :] - seg = seg[margin:seg.shape[0] - margin, 0:seg.shape[1] - margin] + elif i == nxf-1 and j == 0: + seg_color = seg_color[0:seg_color.shape[0] - margin, margin:seg_color.shape[1] - 0, :] + seg = seg[0:seg.shape[0] - margin, margin:seg.shape[1] - 0] - mask_true[index_y_d + margin:index_y_u - margin, index_x_d + 0:index_x_u - margin] = seg - prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + 0:index_x_u - margin, :] = seg_color + mask_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - 0] = seg + prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - 0, :] = seg_color - elif i == nxf-1 and j != 0 and j != nyf-1: - seg_color = seg_color[margin:seg_color.shape[0] - margin, margin:seg_color.shape[1] - 0, :] - seg = seg[margin:seg.shape[0] - margin, margin:seg.shape[1] - 0] + elif i == 0 and j != 0 and j != nyf-1: + seg_color = seg_color[margin:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :] + seg = seg[margin:seg.shape[0] - margin, 0:seg.shape[1] - margin] - mask_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - 0] = seg - prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - 0, :] = seg_color + mask_true[index_y_d + margin:index_y_u - margin, index_x_d + 0:index_x_u - margin] = seg + prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + 0:index_x_u - margin, :] = seg_color - elif i != 0 and i != nxf-1 and j == 0: - seg_color = seg_color[0:seg_color.shape[0] - margin, margin:seg_color.shape[1] - margin, :] - seg = seg[0:seg.shape[0] - margin, margin:seg.shape[1] - margin] + elif i == nxf-1 and j != 0 and j != nyf-1: + seg_color = seg_color[margin:seg_color.shape[0] - margin, margin:seg_color.shape[1] - 0, :] + seg = seg[margin:seg.shape[0] - margin, margin:seg.shape[1] - 0] - mask_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - margin] = seg - prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - margin, :] = seg_color + mask_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - 0] = seg + prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - 0, :] = seg_color - elif i != 0 and i != nxf-1 and j == nyf-1: - seg_color = seg_color[margin:seg_color.shape[0] - 0, margin:seg_color.shape[1] - margin, :] - seg = seg[margin:seg.shape[0] - 0, margin:seg.shape[1] - margin] + elif i != 0 and i != nxf-1 and j == 0: + seg_color = seg_color[0:seg_color.shape[0] - margin, margin:seg_color.shape[1] - margin, :] + seg = seg[0:seg.shape[0] - margin, margin:seg.shape[1] - margin] - mask_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - margin] = seg - prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - margin, :] = seg_color + mask_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - margin] = seg + prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - margin, :] = seg_color - else: - seg_color = seg_color[margin:seg_color.shape[0] - margin, margin:seg_color.shape[1] - margin, :] - seg = seg[margin:seg.shape[0] - margin, margin:seg.shape[1] - margin] + elif i != 0 and i != nxf-1 and j == nyf-1: + seg_color = seg_color[margin:seg_color.shape[0] - 0, margin:seg_color.shape[1] - margin, :] + seg = seg[margin:seg.shape[0] - 0, margin:seg.shape[1] - margin] - mask_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin] = seg - prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin, :] = seg_color - - - - prediction_true = prediction_true[index_start_h: index_start_h+img_org_h, index_start_w: index_start_w+img_org_w,:] - prediction_true = prediction_true.astype(np.uint8) + mask_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - margin] = seg + prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - margin, :] = seg_color - else: - img_h_page = img.shape[0] - img_w_page = img.shape[1] - img = img / float(255.0) - img = resize_image(img, model_height, model_width) + else: + seg_color = seg_color[margin:seg_color.shape[0] - margin, margin:seg_color.shape[1] - margin, :] + seg = seg[margin:seg.shape[0] - margin, margin:seg.shape[1] - margin] - label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) + mask_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin] = seg + prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin, :] = seg_color + + + + prediction_true = prediction_true[index_start_h: index_start_h+img_org_h, index_start_w: index_start_w+img_org_w,:] + prediction_true = prediction_true.astype(np.uint8) - seg = np.argmax(label_p_pred, axis=3)[0] - seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - prediction_true = resize_image(seg_color, img_h_page, img_w_page) - prediction_true = prediction_true.astype(np.uint8) return prediction_true[:,:,0] - def run(self, image=None, image_path=None, save=None, use_patches=False): + def run(self, image=None, image_path=None, save=None): if (image is not None and image_path is not None) or \ (image is None and image_path is None): raise ValueError("Must pass either a opencv2 image or an image_path") @@ -256,7 +240,7 @@ class SbbBinarizer: for n, (model, model_file) in enumerate(zip(self.models, self.model_files)): self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files))) - res = self.predict(model, image, use_patches) + res = self.predict(model, image) img_fin = np.zeros((res.shape[0], res.shape[1], 3)) res[:, :][res[:, :] == 0] = 2