From 87ae6d11a91a89bb26a3e559725f99310fab9b9c Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 25 Mar 2025 23:01:12 +0100 Subject: [PATCH] pep8: whitespaces around operators --- src/eynollah/cli.py | 51 +- src/eynollah/eynollah.py | 2921 +++++++++++++------------ src/eynollah/ocrd_cli_binarization.py | 39 +- src/eynollah/plot.py | 90 +- src/eynollah/sbb_binarize.py | 271 +-- src/eynollah/utils/__init__.py | 1447 ++++++------ src/eynollah/utils/contour.py | 33 +- src/eynollah/utils/drop_capitals.py | 173 +- src/eynollah/utils/marginals.py | 229 +- src/eynollah/utils/pil_cv2.py | 3 +- src/eynollah/utils/rotate.py | 9 +- src/eynollah/utils/separate_lines.py | 560 ++--- src/eynollah/writer.py | 212 +- 13 files changed, 3131 insertions(+), 2907 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index f07c0a0..8b9ccd4 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -17,21 +17,18 @@ def main(): help="directory of GT page-xml files", type=click.Path(exists=True, file_okay=False), ) - @click.option( "--dir_out_modal_image", "-domi", help="directory where ground truth images would be written", type=click.Path(exists=True, file_okay=False), ) - @click.option( "--dir_out_classes", "-docl", help="directory where ground truth classes would be written", type=click.Path(exists=True, file_okay=False), ) - @click.option( "--input_height", "-ih", @@ -47,18 +44,17 @@ def main(): "-min", help="min area size of regions considered for reading order training.", ) - -def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, input_height, input_width, min_area_size): +def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, input_height, input_width, + min_area_size): xml_files_ind = os.listdir(dir_xml) - -@main.command() -@click.option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.') - -@click.option('--model_dir', '-m', type=click.Path(exists=True, file_okay=False), required=True, help='directory containing models for prediction') +@main.command() +@click.option('--patches/--no-patches', default=True, + help='by enabling this parameter you let the model to see the image in patches.') +@click.option('--model_dir', '-m', type=click.Path(exists=True, file_okay=False), required=True, + help='directory containing models for prediction') @click.argument('input_image') - @click.argument('output_image') @click.option( "--dir_in", @@ -72,7 +68,6 @@ def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, i help="directory where the binarized images will be written", type=click.Path(exists=True, file_okay=False), ) - def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out): if not dir_out and dir_in: print("Error: You used -di but did not set -do") @@ -80,9 +75,8 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out) elif dir_out and not dir_in: print("Error: You used -do to write out binarized images but have not set -di") sys.exit(1) - SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image, dir_in=dir_in, dir_out=dir_out) - - + SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image, dir_in=dir_in, + dir_out=dir_out) @main.command() @@ -92,7 +86,6 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out) help="image filename", type=click.Path(exists=True, dir_okay=False), ) - @click.option( "--out", "-o", @@ -261,15 +254,19 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out) type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), help="Override log level globally to this", ) - -def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level): +def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, + save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, + input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, + num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level): initLogging() if log_level: getLogger('eynollah').setLevel(getLevelName(log_level)) - if not enable_plotting and (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement): + if not enable_plotting and ( + save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement): print("Error: You used one of -sl, -sd, -sa, -sp, -si or -ae but did not enable plotting with -ep") sys.exit(1) - elif enable_plotting and not (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement): + elif enable_plotting and not ( + save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement): print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa, -sp, -si or -ae") sys.exit(1) if textline_light and not light_version: @@ -277,8 +274,10 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ sys.exit(1) if light_version and not textline_light: print('Error: You used -light without -tll. Light version need light textline to be enabled.') - if extract_only_images and (allow_enhancement or allow_scaling or light_version or curved_line or textline_light or full_layout or tables or right2left or headers_off) : - print('Error: You used -eoi which can not be enabled alongside light_version -light or allow_scaling -as or allow_enhancement -ae or curved_line -cl or textline_light -tll or full_layout -fl or tables -tab or right2left -r2l or headers_off -ho') + if extract_only_images and ( + allow_enhancement or allow_scaling or light_version or curved_line or textline_light or full_layout or tables or right2left or headers_off): + print( + 'Error: You used -eoi which can not be enabled alongside light_version -light or allow_scaling -as or allow_enhancement -ae or curved_line -cl or textline_light -tll or full_layout -fl or tables -tab or right2left -r2l or headers_off -ho') sys.exit(1) eynollah = Eynollah( image_filename=image, @@ -315,8 +314,8 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ else: pcgts = eynollah.run() eynollah.writer.write_pagexml(pcgts) - - + + @main.command() @click.option( "--dir_in", @@ -368,8 +367,8 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), help="Override log level globally to this", ) - -def ocr(dir_in, out, dir_xmls, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, log_level): +def ocr(dir_in, out, dir_xmls, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, + log_level): if log_level: setOverrideLogLevel(log_level) initLogging() diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 612303a..535aee8 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -26,7 +26,7 @@ from PIL import Image import torch from difflib import SequenceMatcher as sq from transformers import VisionEncoderDecoderModel -from numba import cuda +from numba import cuda import copy from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d @@ -38,6 +38,7 @@ sys.stderr = open(os.devnull, "w") import tensorflow as tf from tensorflow.python.keras import backend as K from tensorflow.keras.models import load_model + sys.stderr = stderr tf.get_logger().setLevel("ERROR") warnings.filterwarnings("ignore") @@ -104,14 +105,14 @@ from .writer import EynollahXmlWriter MIN_AREA_REGION = 0.000001 SLOPE_THRESHOLD = 0.13 -RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45: +RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45: DPI_THRESHOLD = 298 MAX_SLOPE = 999 KERNEL = np.ones((5, 5), np.uint8) projection_dim = 64 patch_size = 1 -num_patches =21*21#14*14#28*28#14*14#28*28 +num_patches = 21 * 21 #14*14#28*28#14*14#28*28 class Patches(layers.Layer): @@ -131,15 +132,15 @@ class Patches(layers.Layer): patch_dims = patches.shape[-1] patches = tf.reshape(patches, [batch_size, -1, patch_dims]) return patches - def get_config(self): + def get_config(self): config = super().get_config().copy() config.update({ 'patch_size': self.patch_size, }) return config - - + + class PatchEncoder(layers.Layer): def __init__(self, **kwargs): super(PatchEncoder, self).__init__() @@ -153,8 +154,8 @@ class PatchEncoder(layers.Layer): positions = tf.range(start=0, limit=self.num_patches, delta=1) encoded = self.projection(patch) + self.position_embedding(positions) return encoded - def get_config(self): + def get_config(self): config = super().get_config().copy() config.update({ 'num_patches': self.num_patches, @@ -166,40 +167,40 @@ class PatchEncoder(layers.Layer): class Eynollah: def __init__( - self, - dir_models, - image_filename=None, - image_pil=None, - image_filename_stem=None, - overwrite=False, - dir_out=None, - dir_in=None, - dir_of_cropped_images=None, - extract_only_images=False, - dir_of_layout=None, - dir_of_deskewed=None, - dir_of_all=None, - dir_save_page=None, - enable_plotting=False, - allow_enhancement=False, - curved_line=False, - textline_light=False, - full_layout=False, - tables=False, - right2left=False, - input_binary=False, - allow_scaling=False, - headers_off=False, - light_version=False, - ignore_page_extraction=False, - reading_order_machine_based=False, - do_ocr=False, - num_col_upper=None, - num_col_lower=None, - skip_layout_and_reading_order = False, - override_dpi=None, - logger=None, - pcgts=None, + self, + dir_models, + image_filename=None, + image_pil=None, + image_filename_stem=None, + overwrite=False, + dir_out=None, + dir_in=None, + dir_of_cropped_images=None, + extract_only_images=False, + dir_of_layout=None, + dir_of_deskewed=None, + dir_of_all=None, + dir_save_page=None, + enable_plotting=False, + allow_enhancement=False, + curved_line=False, + textline_light=False, + full_layout=False, + tables=False, + right2left=False, + input_binary=False, + allow_scaling=False, + headers_off=False, + light_version=False, + ignore_page_extraction=False, + reading_order_machine_based=False, + do_ocr=False, + num_col_upper=None, + num_col_lower=None, + skip_layout_and_reading_order=False, + override_dpi=None, + logger=None, + pcgts=None, ): if skip_layout_and_reading_order: textline_light = True @@ -219,9 +220,9 @@ class Eynollah: self.dir_save_page = dir_save_page self.reading_order_machine_based = reading_order_machine_based self.dir_of_deskewed = dir_of_deskewed - self.dir_of_deskewed = dir_of_deskewed - self.dir_of_cropped_images=dir_of_cropped_images - self.dir_of_layout=dir_of_layout + self.dir_of_deskewed = dir_of_deskewed + self.dir_of_cropped_images = dir_of_cropped_images + self.dir_of_layout = dir_of_layout self.enable_plotting = enable_plotting self.allow_enhancement = allow_enhancement self.curved_line = curved_line @@ -259,7 +260,7 @@ class Eynollah: dir_out=self.dir_out, image_filename=self.image_filename, curved_line=self.curved_line, - textline_light = self.textline_light, + textline_light=self.textline_light, pcgts=pcgts) self.logger = logger if logger else getLogger('eynollah') # for parallelization of CPU-intensive tasks: @@ -312,22 +313,22 @@ class Eynollah: self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024" if self.ocr: self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124" - + if self.tables: if self.light_version: self.model_table_dir = dir_models + "/modelens_table_0t4_201124" else: self.model_table_dir = dir_models + "/eynollah-tables_20210319" - + self.models = {} - + if dir_in: # as in start_new_session: config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True session = tf.compat.v1.Session(config=config) set_session(session) - + self.model_page = self.our_load_model(self.model_page_dir) self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) self.model_bin = self.our_load_model(self.model_dir_of_binarization) @@ -354,8 +355,8 @@ class Eynollah: self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") if self.tables: self.model_table = self.our_load_model(self.model_table_dir) - - self.ls_imgs = os.listdir(self.dir_in) + + self.ls_imgs = os.listdir(self.dir_in) def _cache_images(self, image_filename=None, image_pil=None): ret = {} @@ -373,7 +374,7 @@ class Eynollah: else: self.dpi = check_dpi(image_pil) ret['img_grayscale'] = cv2.cvtColor(ret['img'], cv2.COLOR_BGR2GRAY) - for prefix in ('', '_grayscale'): + for prefix in ('', '_grayscale'): ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8) return ret @@ -381,7 +382,7 @@ class Eynollah: t_c = time.time() self._imgs = self._cache_images(image_filename=image_filename) self.image_filename = image_filename - + self.plotter = None if not self.enable_plotting else EynollahPlotter( dir_out=self.dir_out, dir_of_all=self.dir_of_all, @@ -390,12 +391,12 @@ class Eynollah: dir_of_cropped_images=self.dir_of_cropped_images, dir_of_layout=self.dir_of_layout, image_filename_stem=Path(Path(image_filename).name).stem) - + self.writer = EynollahXmlWriter( dir_out=self.dir_out, image_filename=self.image_filename, curved_line=self.curved_line, - textline_light = self.textline_light, + textline_light=self.textline_light, pcgts=self.pcgts) def imread(self, grayscale=False, uint8=True): @@ -405,7 +406,7 @@ class Eynollah: if uint8: key += '_uint8' return self._imgs[key].copy() - + def isNaN(self, num): return num != num @@ -461,49 +462,49 @@ class Eynollah: if i == 0 and j == 0: prediction_true[index_y_d + 0:index_y_u - margin, - index_x_d + 0:index_x_u - margin] = \ - seg[0:-margin or None, - 0:-margin or None] + index_x_d + 0:index_x_u - margin] = \ + seg[0:-margin or None, + 0:-margin or None] elif i == nxf - 1 and j == nyf - 1: prediction_true[index_y_d + margin:index_y_u - 0, - index_x_d + margin:index_x_u - 0] = \ - seg[margin:, - margin:] + index_x_d + margin:index_x_u - 0] = \ + seg[margin:, + margin:] elif i == 0 and j == nyf - 1: prediction_true[index_y_d + margin:index_y_u - 0, - index_x_d + 0:index_x_u - margin] = \ - seg[margin:, - 0:-margin or None] + index_x_d + 0:index_x_u - margin] = \ + seg[margin:, + 0:-margin or None] elif i == nxf - 1 and j == 0: prediction_true[index_y_d + 0:index_y_u - margin, - index_x_d + margin:index_x_u - 0] = \ - seg[0:-margin or None, - margin:] + index_x_d + margin:index_x_u - 0] = \ + seg[0:-margin or None, + margin:] elif i == 0 and j != 0 and j != nyf - 1: prediction_true[index_y_d + margin:index_y_u - margin, - index_x_d + 0:index_x_u - margin] = \ - seg[margin:-margin or None, - 0:-margin or None] + index_x_d + 0:index_x_u - margin] = \ + seg[margin:-margin or None, + 0:-margin or None] elif i == nxf - 1 and j != 0 and j != nyf - 1: prediction_true[index_y_d + margin:index_y_u - margin, - index_x_d + margin:index_x_u - 0] = \ - seg[margin:-margin or None, - margin:] + index_x_d + margin:index_x_u - 0] = \ + seg[margin:-margin or None, + margin:] elif i != 0 and i != nxf - 1 and j == 0: prediction_true[index_y_d + 0:index_y_u - margin, - index_x_d + margin:index_x_u - margin] = \ - seg[0:-margin or None, - margin:-margin or None] + index_x_d + margin:index_x_u - margin] = \ + seg[0:-margin or None, + margin:-margin or None] elif i != 0 and i != nxf - 1 and j == nyf - 1: prediction_true[index_y_d + margin:index_y_u - 0, - index_x_d + margin:index_x_u - margin] = \ - seg[margin:, - margin:-margin or None] + index_x_d + margin:index_x_u - margin] = \ + seg[margin:, + margin:-margin or None] else: prediction_true[index_y_d + margin:index_y_u - margin, - index_x_d + margin:index_x_u - margin] = \ - seg[margin:-margin or None, - margin:-margin or None] + index_x_d + margin:index_x_u - margin] = \ + seg[margin:-margin or None, + margin:-margin or None] prediction_true = prediction_true.astype(int) return prediction_true @@ -558,7 +559,7 @@ class Eynollah: num_column_is_classified = True return img_new, num_column_is_classified - + def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label_p_pred): self.logger.debug("enter calculate_width_height_by_columns") if num_col == 1: @@ -609,7 +610,7 @@ class Eynollah: img = self.imread() _, page_coord = self.early_page_for_num_of_column_classification(img) - + if not self.dir_in: self.model_classifier, _ = self.start_new_session_and_model(self.model_dir_of_col_classifier) @@ -622,7 +623,7 @@ class Eynollah: else: img_1ch = self.imread(grayscale=True, uint8=False) width_early = img_1ch.shape[1] - img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + img_1ch = img_1ch[page_coord[0]: page_coord[1], page_coord[2]: page_coord[3]] # plt.imshow(img_1ch) # plt.show() @@ -656,24 +657,24 @@ class Eynollah: self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5) - prediction_bin = 255 * (prediction_bin[:,:,0]==0) + prediction_bin = 255 * (prediction_bin[:, :, 0] == 0) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8) - img= np.copy(prediction_bin) + img = np.copy(prediction_bin) img_bin = prediction_bin else: img = self.imread() img_bin = None - + width_early = img.shape[1] t1 = time.time() _, page_coord = self.early_page_for_num_of_column_classification(img_bin) - - self.image_page_org_size = img[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3], :] + + self.image_page_org_size = img[page_coord[0]: page_coord[1], page_coord[2]: page_coord[3], :] self.page_coord = page_coord - + if not self.dir_in: self.model_classifier, _ = self.start_new_session_and_model(self.model_dir_of_col_classifier) - + if self.num_col_upper and not self.num_col_lower: num_col = self.num_col_upper label_p_pred = [np.ones(6)] @@ -689,7 +690,7 @@ class Eynollah: else: img_1ch = self.imread(grayscale=True) width_early = img_1ch.shape[1] - img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + img_1ch = img_1ch[page_coord[0]: page_coord[1], page_coord[2]: page_coord[3]] img_1ch = img_1ch / 255.0 img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) @@ -700,7 +701,7 @@ class Eynollah: label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 - elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower): + elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper != self.num_col_lower): if self.input_binary: img_in = np.copy(img) img_in = img_in / 255.0 @@ -709,7 +710,7 @@ class Eynollah: else: img_1ch = self.imread(grayscale=True) width_early = img_1ch.shape[1] - img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + img_1ch = img_1ch[page_coord[0]: page_coord[1], page_coord[2]: page_coord[3]] img_1ch = img_1ch / 255.0 img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) @@ -720,7 +721,7 @@ class Eynollah: label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 - + if num_col > self.num_col_upper: num_col = self.num_col_upper label_p_pred = [np.ones(6)] @@ -730,11 +731,11 @@ class Eynollah: else: num_col = self.num_col_upper label_p_pred = [np.ones(6)] - + self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5)) if not self.extract_only_images: if dpi < DPI_THRESHOLD: - if light_version and num_col in (1,2): + if light_version and num_col in (1, 2): img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2( img, num_col, width_early, label_p_pred) else: @@ -746,7 +747,7 @@ class Eynollah: image_res = self.predict_enhancement(img_new) is_image_enhanced = True else: - if light_version and num_col in (1,2): + if light_version and num_col in (1, 2): img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2( img, num_col, width_early, label_p_pred) image_res = np.copy(img_new) @@ -844,7 +845,7 @@ class Eynollah: try: model = load_model(model_dir, compile=False) except: - model = load_model(model_dir , compile=False, custom_objects={ + model = load_model(model_dir, compile=False, custom_objects={ "PatchEncoder": PatchEncoder, "Patches": Patches}) self.models[model_dir] = model @@ -868,14 +869,14 @@ class Eynollah: label_p_pred = model.predict(img[np.newaxis], verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] - + if thresholding_for_artificial_class_in_light_version: - seg_art = label_p_pred[0,:,:,2] - - seg_art[seg_art<0.2] = 0 - seg_art[seg_art>0] =1 - - seg[seg_art==1]=2 + seg_art = label_p_pred[0, :, :, 2] + + seg_art[seg_art < 0.2] = 0 + seg_art[seg_art > 0] = 1 + + seg[seg_art == 1] = 2 seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8) return prediction_true @@ -937,39 +938,39 @@ class Eynollah: list_y_d.append(index_y_d) list_y_u.append(index_y_u) - img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + img_patch[batch_indexer, :, :, :] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] batch_indexer += 1 if (batch_indexer == n_batch_inference or - # last batch - i == nxf - 1 and j == nyf - 1): + # last batch + i == nxf - 1 and j == nyf - 1): self.logger.debug("predicting patches on %s", str(img_patch.shape)) label_p_pred = model.predict(img_patch, verbose=0) seg = np.argmax(label_p_pred, axis=3) if thresholding_for_some_classes_in_light_version: - seg_not_base = label_p_pred[:,:,:,4] - seg_not_base[seg_not_base>0.03] =1 - seg_not_base[seg_not_base<1] =0 + seg_not_base = label_p_pred[:, :, :, 4] + seg_not_base[seg_not_base > 0.03] = 1 + seg_not_base[seg_not_base < 1] = 0 - seg_line = label_p_pred[:,:,:,3] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 + seg_line = label_p_pred[:, :, :, 3] + seg_line[seg_line > 0.1] = 1 + seg_line[seg_line < 1] = 0 - seg_background = label_p_pred[:,:,:,0] - seg_background[seg_background>0.25] =1 - seg_background[seg_background<1] =0 + seg_background = label_p_pred[:, :, :, 0] + seg_background[seg_background > 0.25] = 1 + seg_background[seg_background < 1] = 0 - seg[seg_not_base==1]=4 - seg[seg_background==1]=0 - seg[(seg_line==1) & (seg==0)]=3 + seg[seg_not_base == 1] = 4 + seg[seg_background == 1] = 0 + seg[(seg_line == 1) & (seg == 0)] = 3 if thresholding_for_artificial_class_in_light_version: - seg_art = label_p_pred[:,:,:,2] + seg_art = label_p_pred[:, :, :, 2] - seg_art[seg_art<0.2] = 0 - seg_art[seg_art>0] =1 + seg_art[seg_art < 0.2] = 0 + seg_art[seg_art > 0] = 1 - seg[seg_art==1]=2 + seg[seg_art == 1] = 2 indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): @@ -983,61 +984,60 @@ class Eynollah: if i_batch == 0 and j_batch == 0: prediction_true[index_y_d_in + 0:index_y_u_in - margin, - index_x_d_in + 0:index_x_u_in - margin] = \ - seg_in[0:-margin or None, - 0:-margin or None, - np.newaxis] + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[0:-margin or None, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch == nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - 0, - index_x_d_in + margin:index_x_u_in - 0] = \ - seg_in[margin:, - margin:, - np.newaxis] + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[margin:, + margin:, + np.newaxis] elif i_batch == 0 and j_batch == nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - 0, - index_x_d_in + 0:index_x_u_in - margin] = \ - seg_in[margin:, - 0:-margin or None, - np.newaxis] + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[margin:, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch == 0: prediction_true[index_y_d_in + 0:index_y_u_in - margin, - index_x_d_in + margin:index_x_u_in - 0] = \ - seg_in[0:-margin or None, - margin:, - np.newaxis] + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[0:-margin or None, + margin:, + np.newaxis] elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - margin, - index_x_d_in + 0:index_x_u_in - margin] = \ - seg_in[margin:-margin or None, - 0:-margin or None, - np.newaxis] + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[margin:-margin or None, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - margin, - index_x_d_in + margin:index_x_u_in - 0] = \ - seg_in[margin:-margin or None, - margin:, - np.newaxis] + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[margin:-margin or None, + margin:, + np.newaxis] elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: prediction_true[index_y_d_in + 0:index_y_u_in - margin, - index_x_d_in + margin:index_x_u_in - margin] = \ - seg_in[0:-margin or None, - margin:-margin or None, - np.newaxis] + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[0:-margin or None, + margin:-margin or None, + np.newaxis] elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - 0, - index_x_d_in + margin:index_x_u_in - margin] = \ - seg_in[margin:, - margin:-margin or None, - np.newaxis] + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[margin:, + margin:-margin or None, + np.newaxis] else: prediction_true[index_y_d_in + margin:index_y_u_in - margin, - index_x_d_in + margin:index_x_u_in - margin] = \ - seg_in[margin:-margin or None, - margin:-margin or None, - np.newaxis] + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[margin:-margin or None, + margin:-margin or None, + np.newaxis] indexer_inside_batch += 1 - list_i_s = [] list_j_s = [] list_x_u = [] @@ -1054,34 +1054,34 @@ class Eynollah: return prediction_true def do_padding_with_scale(self, img, scale): - h_n = int(img.shape[0]*scale) - w_n = int(img.shape[1]*scale) - - channel0_avg = int( np.mean(img[:,:,0]) ) - channel1_avg = int( np.mean(img[:,:,1]) ) - channel2_avg = int( np.mean(img[:,:,2]) ) - + h_n = int(img.shape[0] * scale) + w_n = int(img.shape[1] * scale) + + channel0_avg = int(np.mean(img[:, :, 0])) + channel1_avg = int(np.mean(img[:, :, 1])) + channel2_avg = int(np.mean(img[:, :, 2])) + h_diff = img.shape[0] - h_n w_diff = img.shape[1] - w_n - + h_start = int(0.5 * h_diff) w_start = int(0.5 * w_diff) - + img_res = resize_image(img, h_n, w_n) #label_res = resize_image(label, h_n, w_n) - + img_scaled_padded = np.copy(img) - + #label_scaled_padded = np.zeros(label.shape) - - img_scaled_padded[:,:,0] = channel0_avg - img_scaled_padded[:,:,1] = channel1_avg - img_scaled_padded[:,:,2] = channel2_avg - - img_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = img_res[:,:,:] + + img_scaled_padded[:, :, 0] = channel0_avg + img_scaled_padded[:, :, 1] = channel1_avg + img_scaled_padded[:, :, 2] = channel2_avg + + img_scaled_padded[h_start:h_start + h_n, w_start:w_start + w_n, :] = img_res[:, :, :] #label_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = label_res[:,:,:] - - return img_scaled_padded#, label_scaled_padded + + return img_scaled_padded #, label_scaled_padded def do_prediction_new_concept_scatter_nd( self, patches, img, model, @@ -1101,18 +1101,18 @@ class Eynollah: label_p_pred = model.predict(img[np.newaxis], verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] - + if thresholding_for_artificial_class_in_light_version: #seg_text = label_p_pred[0,:,:,1] #seg_text[seg_text<0.2] =0 #seg_text[seg_text>0] =1 #seg[seg_text==1]=1 - - seg_art = label_p_pred[0,:,:,4] - seg_art[seg_art<0.2] =0 - seg_art[seg_art>0] =1 - seg[seg_art==1]=4 - + + seg_art = label_p_pred[0, :, :, 4] + seg_art[seg_art < 0.2] = 0 + seg_art[seg_art > 0] = 1 + seg[seg_art == 1] = 4 + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8) return prediction_true @@ -1130,10 +1130,10 @@ class Eynollah: img = img.astype(np.float16) img_h = img.shape[0] img_w = img.shape[1] - + stride_x = img_width_model - 100 stride_y = img_height_model - 100 - + one_tensor = tf.ones_like(img) img_patches, one_patches = tf.image.extract_patches( images=[img, one_tensor], @@ -1152,33 +1152,33 @@ class Eynollah: y = tf.range(img.shape[0]) x, y = tf.meshgrid(x, y) indices = tf.stack([y, x], axis=-1) - + indices_patches = tf.image.extract_patches( images=tf.expand_dims(indices, axis=0), sizes=[1, img_height_model, img_width_model, 1], strides=[1, stride_y, stride_x, 1], rates=[1, 1, 1, 1], padding='SAME') - indices_patches = tf.squeeze(indices_patches) + indices_patches = tf.squeeze(indices_patches) indices_patches = tf.reshape(indices_patches, shape=(img_patches.shape[0] * img_patches.shape[1], img_height_model, img_width_model, 2)) - margin_y = int( 0.5 * (img_height_model - stride_y) ) - margin_x = int( 0.5 * (img_width_model - stride_x) ) - + margin_y = int(0.5 * (img_height_model - stride_y)) + margin_x = int(0.5 * (img_width_model - stride_x)) + mask_margin = np.zeros((img_height_model, img_width_model)) mask_margin[margin_y:img_height_model - margin_y, - margin_x:img_width_model - margin_x] = 1 - + margin_x:img_width_model - margin_x] = 1 + indices_patches_array = indices_patches.numpy() for i in range(indices_patches_array.shape[0]): - indices_patches_array[i,:,:,0] = indices_patches_array[i,:,:,0]*mask_margin - indices_patches_array[i,:,:,1] = indices_patches_array[i,:,:,1]*mask_margin - + indices_patches_array[i, :, :, 0] = indices_patches_array[i, :, :, 0] * mask_margin + indices_patches_array[i, :, :, 1] = indices_patches_array[i, :, :, 1] * mask_margin + reconstructed = tf.scatter_nd( indices=indices_patches_array, updates=pred_patches, shape=(img.shape[0], img.shape[1], pred_patches.shape[-1])).numpy() - + prediction_true = np.argmax(reconstructed, axis=2).astype(np.uint8) gc.collect() return np.repeat(prediction_true[:, :, np.newaxis], 3, axis=2) @@ -1201,18 +1201,18 @@ class Eynollah: label_p_pred = model.predict(img[np.newaxis], verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] - + if thresholding_for_artificial_class_in_light_version: #seg_text = label_p_pred[0,:,:,1] #seg_text[seg_text<0.2] =0 #seg_text[seg_text>0] =1 #seg[seg_text==1]=1 - - seg_art = label_p_pred[0,:,:,4] - seg_art[seg_art<0.2] =0 - seg_art[seg_art>0] =1 - seg[seg_art==1]=4 - + + seg_art = label_p_pred[0, :, :, 4] + seg_art[seg_art < 0.2] = 0 + seg_art[seg_art > 0] = 1 + seg[seg_art == 1] = 4 + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8) return prediction_true @@ -1278,30 +1278,30 @@ class Eynollah: batch_indexer += 1 if (batch_indexer == n_batch_inference or - # last batch - i == nxf - 1 and j == nyf - 1): + # last batch + i == nxf - 1 and j == nyf - 1): self.logger.debug("predicting patches on %s", str(img_patch.shape)) - label_p_pred = model.predict(img_patch,verbose=0) + label_p_pred = model.predict(img_patch, verbose=0) seg = np.argmax(label_p_pred, axis=3) if thresholding_for_some_classes_in_light_version: - seg_art = label_p_pred[:,:,:,4] - seg_art[seg_art<0.2] =0 - seg_art[seg_art>0] =1 + seg_art = label_p_pred[:, :, :, 4] + seg_art[seg_art < 0.2] = 0 + seg_art[seg_art > 0] = 1 - seg_line = label_p_pred[:,:,:,3] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 + seg_line = label_p_pred[:, :, :, 3] + seg_line[seg_line > 0.1] = 1 + seg_line[seg_line < 1] = 0 - seg[seg_art==1]=4 - seg[(seg_line==1) & (seg==0)]=3 + seg[seg_art == 1] = 4 + seg[(seg_line == 1) & (seg == 0)] = 3 if thresholding_for_artificial_class_in_light_version: - seg_art = label_p_pred[:,:,:,2] + seg_art = label_p_pred[:, :, :, 2] - seg_art[seg_art<0.2] = 0 - seg_art[seg_art>0] =1 + seg_art[seg_art < 0.2] = 0 + seg_art[seg_art > 0] = 1 - seg[seg_art==1]=2 + seg[seg_art == 1] = 2 indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): @@ -1315,58 +1315,58 @@ class Eynollah: if i_batch == 0 and j_batch == 0: prediction_true[index_y_d_in + 0:index_y_u_in - margin, - index_x_d_in + 0:index_x_u_in - margin] = \ - seg_in[0:-margin or None, - 0:-margin or None, - np.newaxis] + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[0:-margin or None, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch == nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - 0, - index_x_d_in + margin:index_x_u_in - 0] = \ - seg_in[margin:, - margin:, - np.newaxis] + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[margin:, + margin:, + np.newaxis] elif i_batch == 0 and j_batch == nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - 0, - index_x_d_in + 0:index_x_u_in - margin] = \ - seg_in[margin:, - 0:-margin or None, - np.newaxis] + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[margin:, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch == 0: prediction_true[index_y_d_in + 0:index_y_u_in - margin, - index_x_d_in + margin:index_x_u_in - 0] = \ - seg_in[0:-margin or None, - margin:, - np.newaxis] + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[0:-margin or None, + margin:, + np.newaxis] elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - margin, - index_x_d_in + 0:index_x_u_in - margin] = \ - seg_in[margin:-margin or None, - 0:-margin or None, - np.newaxis] + index_x_d_in + 0:index_x_u_in - margin] = \ + seg_in[margin:-margin or None, + 0:-margin or None, + np.newaxis] elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - margin, - index_x_d_in + margin:index_x_u_in - 0] = \ - seg_in[margin:-margin or None, - margin:, - np.newaxis] + index_x_d_in + margin:index_x_u_in - 0] = \ + seg_in[margin:-margin or None, + margin:, + np.newaxis] elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: prediction_true[index_y_d_in + 0:index_y_u_in - margin, - index_x_d_in + margin:index_x_u_in - margin] = \ - seg_in[0:-margin or None, - margin:-margin or None, - np.newaxis] + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[0:-margin or None, + margin:-margin or None, + np.newaxis] elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - 0, - index_x_d_in + margin:index_x_u_in - margin] = \ - seg_in[margin:, - margin:-margin or None, - np.newaxis] + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[margin:, + margin:-margin or None, + np.newaxis] else: prediction_true[index_y_d_in + margin:index_y_u_in - margin, - index_x_d_in + margin:index_x_u_in - margin] = \ - seg_in[margin:-margin or None, - margin:-margin or None, - np.newaxis] + index_x_d_in + margin:index_x_u_in - margin] = \ + seg_in[margin:-margin or None, + margin:-margin or None, + np.newaxis] indexer_inside_batch += 1 list_i_s = [] @@ -1396,8 +1396,8 @@ class Eynollah: _, thresh = cv2.threshold(imgray, 0, 255, 0) thresh = cv2.dilate(thresh, KERNEL, iterations=3) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - - if len(contours)>0: + + if len(contours) > 0: cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) cnt = contours[np.argmax(cnt_size)] @@ -1430,7 +1430,7 @@ class Eynollah: [page_coord[2], page_coord[1]]])) return cropped_page, page_coord, cont_page - def early_page_for_num_of_column_classification(self,img_bin): + def early_page_for_num_of_column_classification(self, img_bin): if not self.ignore_page_extraction: self.logger.debug("enter early_page_for_num_of_column_classification") if self.input_binary: @@ -1446,7 +1446,7 @@ class Eynollah: _, thresh = cv2.threshold(imgray, 0, 255, 0) thresh = cv2.dilate(thresh, KERNEL, iterations=3) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - if len(contours)>0: + if len(contours) > 0: cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) cnt = contours[np.argmax(cnt_size)] @@ -1454,7 +1454,7 @@ class Eynollah: else: box = [0, 0, img.shape[1], img.shape[0]] cropped_page, page_coord = crop_image_inside_box(box, img) - + self.logger.debug("exit early_page_for_num_of_column_classification") else: img = self.imread() @@ -1493,7 +1493,8 @@ class Eynollah: else: img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500).astype(np.uint8) - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1, n_batch_inference=3) + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1, + n_batch_inference=3) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions @@ -1555,50 +1556,54 @@ class Eynollah: prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 - - def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): - - polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) + + def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, + slope_deskew): + + polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot, 1, 0.00001) M_main_tot = [cv2.moments(polygons_of_textlines[j]) for j in range(len(polygons_of_textlines))] cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - + args_textlines = np.array(range(len(polygons_of_textlines))) all_found_textline_polygons = [] slopes = [] - all_box_coord =[] - + all_box_coord = [] + for index, con_region_ind in enumerate(contours_par): results = [cv2.pointPolygonTest(con_region_ind, (cx_main_tot[ind], cy_main_tot[ind]), False) - for ind in args_textlines ] + for ind in args_textlines] results = np.array(results) - indexes_in = args_textlines[results==1] + indexes_in = args_textlines[results == 1] textlines_ins = [polygons_of_textlines[ind] for ind in indexes_in] - + all_found_textline_polygons.append(textlines_ins) slopes.append(slope_deskew) - - _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated) + + _, crop_coor = crop_image_inside_box(boxes[index], image_page_rotated) all_box_coord.append(crop_coor) - - return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes - - def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + + return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array( + range(len(contours_par))), slopes + + def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, + slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_light") results = self.executor.map(partial(do_work_of_slopes_new_light, textline_mask_tot_ea=textline_mask_tot, image_page_rotated=image_page_rotated, - slope_deskew=slope_deskew,textline_light=self.textline_light, - logger=self.logger,), + slope_deskew=slope_deskew, textline_light=self.textline_light, + logger=self.logger, ), boxes, contours, contours_par, range(len(contours_par))) #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_light") return tuple(zip(*results)) - def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, + slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new") @@ -1609,13 +1614,14 @@ class Eynollah: MAX_SLOPE=MAX_SLOPE, KERNEL=KERNEL, logger=self.logger, - plotter=self.plotter,), + plotter=self.plotter, ), boxes, contours, contours_par, range(len(contours_par))) #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new") return tuple(zip(*results)) - def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew): + def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, + mask_texts_only, num_col, scale_par, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_curved") @@ -1629,7 +1635,7 @@ class Eynollah: MAX_SLOPE=MAX_SLOPE, KERNEL=KERNEL, logger=self.logger, - plotter=self.plotter,), + plotter=self.plotter, ), boxes, contours, contours_par, range(len(contours_par))) #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_curved") @@ -1645,41 +1651,40 @@ class Eynollah: img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - + prediction_textline = self.do_prediction( use_patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=self.textline_light) #if not self.textline_light: - #if num_col_classifier==1: - #prediction_textline_nopatch = self.do_prediction(False, img, self.model_textline) - #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 + #if num_col_classifier==1: + #prediction_textline_nopatch = self.do_prediction(False, img, self.model_textline) + #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 prediction_textline = resize_image(prediction_textline, img_h, img_w) - textline_mask_tot_ea_art = (prediction_textline[:,:]==2)*1 - + textline_mask_tot_ea_art = (prediction_textline[:, :] == 2) * 1 + old_art = np.copy(textline_mask_tot_ea_art) if not self.textline_light: textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') #textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) - prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2 - - textline_mask_tot_ea_lines = (prediction_textline[:,:]==1)*1 + prediction_textline[:, :][textline_mask_tot_ea_art[:, :] == 1] = 2 + + textline_mask_tot_ea_lines = (prediction_textline[:, :] == 1) * 1 textline_mask_tot_ea_lines = textline_mask_tot_ea_lines.astype('uint8') if not self.textline_light: textline_mask_tot_ea_lines = cv2.dilate(textline_mask_tot_ea_lines, KERNEL, iterations=1) - - prediction_textline[:,:][textline_mask_tot_ea_lines[:,:]==1]=1 + + prediction_textline[:, :][textline_mask_tot_ea_lines[:, :] == 1] = 1 if not self.textline_light: - prediction_textline[:,:][old_art[:,:]==1]=2 - + prediction_textline[:, :][old_art[:, :] == 1] = 2 + prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) - - self.logger.debug('exit textline_contours') - return ((prediction_textline[:, :, 0]==1).astype(np.uint8), - (prediction_textline_longshot_true_size[:, :, 0]==1).astype(np.uint8)) + self.logger.debug('exit textline_contours') + return ((prediction_textline[:, :, 0] == 1).astype(np.uint8), + (prediction_textline_longshot_true_size[:, :, 0] == 1).astype(np.uint8)) def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process): self.logger.debug('enter do_work_of_slopes') @@ -1688,17 +1693,20 @@ class Eynollah: boxes_sub_new = [] poly_sub = [] for mv in range(len(boxes_per_process)): - crop_img, _ = crop_image_inside_box(boxes_per_process[mv], np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) + crop_img, _ = crop_image_inside_box(boxes_per_process[mv], + np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) crop_img = crop_img[:, :, 0] crop_img = cv2.erode(crop_img, KERNEL, iterations=2) try: textline_con, hierarchy = return_contours_of_image(crop_img) - textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierarchy, max_area=1, min_area=0.0008) + textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierarchy, max_area=1, + min_area=0.0008) y_diff_mean = find_contours_mean_y_diff(textline_con_fil) sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) crop_img[crop_img > 0] = 1 slope_corresponding_textregion = return_deskew_slop(crop_img, sigma_des, - map=self.executor.map, logger=self.logger, plotter=self.plotter) + map=self.executor.map, logger=self.logger, + plotter=self.plotter) except Exception as why: self.logger.error(why) slope_corresponding_textregion = MAX_SLOPE @@ -1718,7 +1726,7 @@ class Eynollah: box_sub.put(boxes_sub_new) self.logger.debug('exit do_work_of_slopes') - def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_classifier): + def get_regions_light_v_extract_only_images(self, img, is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_extract_images_only") erosion_hurts = False img_org = np.copy(img) @@ -1738,38 +1746,39 @@ class Eynollah: elif num_col_classifier == 6: img_w_new = 2500 img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new) - img_resized = resize_image(img,img_h_new, img_w_new ) + img_resized = resize_image(img, img_h_new, img_w_new) if not self.dir_in: - self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens_light_only_images_extraction) + self.model_region, _ = self.start_new_session_and_model( + self.model_region_dir_p_ens_light_only_images_extraction) prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region) - prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) + prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h) image_page, page_coord, cont_page = self.extract_page() - prediction_regions_org = prediction_regions_org[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - prediction_regions_org=prediction_regions_org[:,:,0] + prediction_regions_org = prediction_regions_org[page_coord[0]: page_coord[1], page_coord[2]: page_coord[3]] + prediction_regions_org = prediction_regions_org[:, :, 0] - mask_lines_only = (prediction_regions_org[:,:] ==3)*1 - mask_texts_only = (prediction_regions_org[:,:] ==1)*1 - mask_images_only=(prediction_regions_org[:,:] ==2)*1 + mask_lines_only = (prediction_regions_org[:, :] == 3) * 1 + mask_texts_only = (prediction_regions_org[:, :] == 1) * 1 + mask_images_only = (prediction_regions_org[:, :] == 2) * 1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) polygons_lines_xml = textline_con_fil = filter_contours_area_of_image( mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) - polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) + polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_lines, color=(3, 3, 3)) - text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1,1,1)) + text_regions_p_true[:, :][mask_images_only[:, :] == 1] = 2 + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1, 1, 1)) - text_regions_p_true[text_regions_p_true.shape[0]-15:text_regions_p_true.shape[0], :] = 0 - text_regions_p_true[:, text_regions_p_true.shape[1]-15:text_regions_p_true.shape[1]] = 0 + text_regions_p_true[text_regions_p_true.shape[0] - 15:text_regions_p_true.shape[0], :] = 0 + text_regions_p_true[:, text_regions_p_true.shape[1] - 15:text_regions_p_true.shape[1]] = 0 ##polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.0001) polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.001) @@ -1823,7 +1832,7 @@ class Eynollah: self.logger.debug("exit get_regions_extract_images_only") return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page - def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False): + def get_regions_light_v(self, img, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False): self.logger.debug("enter get_regions_light_v") t_in = time.time() erosion_hurts = False @@ -1833,11 +1842,11 @@ class Eynollah: #model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) #print(num_col_classifier,'num_col_classifier') - + if num_col_classifier == 1: img_w_new = 1000 elif num_col_classifier == 2: - img_w_new = 1500#1500 + img_w_new = 1500 #1500 elif num_col_classifier == 3: img_w_new = 2000 elif num_col_classifier == 4: @@ -1847,34 +1856,34 @@ class Eynollah: else: img_w_new = 4000 img_h_new = img_w_new * img_org.shape[0] // img_org.shape[1] - img_resized = resize_image(img,img_h_new, img_w_new ) - + img_resized = resize_image(img, img_h_new, img_w_new) + t_bin = time.time() #if (not self.input_binary) or self.full_layout: #if self.input_binary: - #img_bin = np.copy(img_resized) + #img_bin = np.copy(img_resized) ###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30): - ###if not self.dir_in: - ###self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) - ###prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) - - ####print("inside bin ", time.time()-t_bin) - ###prediction_bin=prediction_bin[:,:,0] - ###prediction_bin = (prediction_bin[:,:]==0)*1 - ###prediction_bin = prediction_bin*255 - - ###prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - - ###prediction_bin = prediction_bin.astype(np.uint16) - ####img= np.copy(prediction_bin) - ###img_bin = np.copy(prediction_bin) + ###if not self.dir_in: + ###self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) + ###prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + + ####print("inside bin ", time.time()-t_bin) + ###prediction_bin=prediction_bin[:,:,0] + ###prediction_bin = (prediction_bin[:,:]==0)*1 + ###prediction_bin = prediction_bin*255 + + ###prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + + ###prediction_bin = prediction_bin.astype(np.uint16) + ####img= np.copy(prediction_bin) + ###img_bin = np.copy(prediction_bin) ###else: - ###img_bin = np.copy(img_resized) + ###img_bin = np.copy(img_resized) if self.ocr and not self.input_binary: if not self.dir_in: self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) - prediction_bin = 255 * (prediction_bin[:,:,0] == 0) + prediction_bin = 255 * (prediction_bin[:, :, 0] == 0) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) prediction_bin = prediction_bin.astype(np.uint16) #img= np.copy(prediction_bin) @@ -1882,15 +1891,16 @@ class Eynollah: else: img_bin = np.copy(img_resized) #print("inside 1 ", time.time()-t_in) - + ###textline_mask_tot_ea = self.run_textline(img_bin) - self.logger.debug("detecting textlines on %s with %d colors", str(img_resized.shape), len(np.unique(img_resized))) + self.logger.debug("detecting textlines on %s with %d colors", str(img_resized.shape), + len(np.unique(img_resized))) textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) - + textline_mask_tot_ea = resize_image(textline_mask_tot_ea, img_height_h, img_width_h) + #print(self.image_org.shape) #cv2.imwrite('out_13.png', self.image_page_org_size) - + #plt.imshwo(self.image_page_org_size) #plt.show() if not skip_layout_and_reading_order: @@ -1901,7 +1911,7 @@ class Eynollah: if num_col_classifier == 1 or num_col_classifier == 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: + if self.image_org.shape[0] / self.image_org.shape[1] > 2.5: self.logger.debug("resized to %dx%d for %d cols", img_resized.shape[1], img_resized.shape[0], num_col_classifier) prediction_regions_org = self.do_prediction_new_concept( @@ -1917,8 +1927,8 @@ class Eynollah: prediction_regions_org[ys, xs] = prediction_regions_page else: - new_h = (900+ (num_col_classifier-3)*100) - img_resized = resize_image(img_bin, int(new_h * img_bin.shape[0] /img_bin.shape[1]), new_h) + new_h = (900 + (num_col_classifier - 3) * 100) + img_resized = resize_image(img_bin, int(new_h * img_bin.shape[0] / img_bin.shape[1]), new_h) self.logger.debug("resized to %dx%d (new_h=%d) for %d cols", img_resized.shape[1], img_resized.shape[0], new_h, num_col_classifier) prediction_regions_org = self.do_prediction_new_concept( @@ -1928,157 +1938,158 @@ class Eynollah: #print("inside 3 ", time.time()-t_in) #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() - - prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) - img_bin = resize_image(img_bin,img_height_h, img_width_h ) - prediction_regions_org=prediction_regions_org[:,:,0] - - mask_lines_only = (prediction_regions_org[:,:] ==3)*1 - mask_texts_only = (prediction_regions_org[:,:] ==1)*1 + + prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h) + img_bin = resize_image(img_bin, img_height_h, img_width_h) + prediction_regions_org = prediction_regions_org[:, :, 0] + + mask_lines_only = (prediction_regions_org[:, :] == 3) * 1 + mask_texts_only = (prediction_regions_org[:, :] == 1) * 1 mask_texts_only = mask_texts_only.astype('uint8') - + ##if num_col_classifier == 1 or num_col_classifier == 2: - ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) - ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) - - mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) - mask_images_only=(prediction_regions_org[:,:] ==2)*1 - + ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + + mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2, 2), np.uint8), iterations=1) + mask_images_only = (prediction_regions_org[:, :] == 2) * 1 + polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) test_khat = np.zeros(prediction_regions_org.shape) - test_khat = cv2.fillPoly(test_khat, pts=polygons_lines_xml, color=(1,1,1)) + test_khat = cv2.fillPoly(test_khat, pts=polygons_lines_xml, color=(1, 1, 1)) #plt.imshow(test_khat[:,:]) #plt.show() #for jv in range(1): - #print(jv, hir_lines_xml[0][232][3]) - #test_khat = np.zeros(prediction_regions_org.shape) - #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) - #plt.imshow(test_khat[:,:]) - #plt.show() + #print(jv, hir_lines_xml[0][232][3]) + #test_khat = np.zeros(prediction_regions_org.shape) + #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) + #plt.imshow(test_khat[:,:]) + #plt.show() polygons_lines_xml = filter_contours_area_of_image( mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) - + test_khat = np.zeros(prediction_regions_org.shape) - test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) - + test_khat = cv2.fillPoly(test_khat, pts=polygons_lines_xml, color=(1, 1, 1)) + #plt.imshow(test_khat[:,:]) #plt.show() #sys.exit() - - polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) + + polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) ##polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts) - polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) - + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) + text_regions_p_true = np.zeros(prediction_regions_org.shape) - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_lines, color=(3,3,3)) - - text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_lines, color=(3, 3, 3)) + + text_regions_p_true[:, :][mask_images_only[:, :] == 1] = 2 + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1, 1, 1)) + #plt.imshow(textline_mask_tot_ea) #plt.show() - - textline_mask_tot_ea[(text_regions_p_true==0) | (text_regions_p_true==4) ] = 0 - + + textline_mask_tot_ea[(text_regions_p_true == 0) | (text_regions_p_true == 4)] = 0 + #plt.imshow(textline_mask_tot_ea) #plt.show() #print("inside 4 ", time.time()-t_in) self.logger.debug("exit get_regions_light_v") return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin else: - img_bin = resize_image(img_bin,img_height_h, img_width_h ) + img_bin = resize_image(img_bin, img_height_h, img_width_h) self.logger.debug("exit get_regions_light_v") return None, erosion_hurts, None, textline_mask_tot_ea, img_bin - def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): + def get_regions_from_xy_2models(self, img, is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_from_xy_2models") erosion_hurts = False img_org = np.copy(img) img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] - + if not self.dir_in: self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) - ratio_y=1.3 - ratio_x=1 + ratio_y = 1.3 + ratio_x = 1 - img = resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) + img = resize_image(img_org, int(img_org.shape[0] * ratio_y), int(img_org.shape[1] * ratio_x)) prediction_regions_org_y = self.do_prediction(True, img, self.model_region) - prediction_regions_org_y = resize_image(prediction_regions_org_y, img_height_h, img_width_h ) + prediction_regions_org_y = resize_image(prediction_regions_org_y, img_height_h, img_width_h) #plt.imshow(prediction_regions_org_y[:,:,0]) #plt.show() - prediction_regions_org_y = prediction_regions_org_y[:,:,0] - mask_zeros_y = (prediction_regions_org_y[:,:]==0)*1 - + prediction_regions_org_y = prediction_regions_org_y[:, :, 0] + mask_zeros_y = (prediction_regions_org_y[:, :] == 0) * 1 + ##img_only_regions_with_sep = ( (prediction_regions_org_y[:,:] != 3) & (prediction_regions_org_y[:,:] != 0) )*1 img_only_regions_with_sep = (prediction_regions_org_y == 1).astype(np.uint8) try: - img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=20) + img_only_regions = cv2.erode(img_only_regions_with_sep[:, :], KERNEL, iterations=20) _, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) - img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1))) - + img = resize_image(img_org, int(img_org.shape[0]), + int(img_org.shape[1] * (1.2 if is_image_enhanced else 1))) + prediction_regions_org = self.do_prediction(True, img, self.model_region) - prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) + prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h) - prediction_regions_org=prediction_regions_org[:,:,0] - prediction_regions_org[(prediction_regions_org[:,:]==1) & (mask_zeros_y[:,:]==1)]=0 + prediction_regions_org = prediction_regions_org[:, :, 0] + prediction_regions_org[(prediction_regions_org[:, :] == 1) & (mask_zeros_y[:, :] == 1)] = 0 if not self.dir_in: self.model_region_p2, _ = self.start_new_session_and_model(self.model_region_dir_p2) img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1])) - + prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, marginal_of_patch_percent=0.2) - prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) + prediction_regions_org2 = resize_image(prediction_regions_org2, img_height_h, img_width_h) - mask_zeros2 = (prediction_regions_org2[:,:,0] == 0) - mask_lines2 = (prediction_regions_org2[:,:,0] == 3) - text_sume_early = (prediction_regions_org[:,:] == 1).sum() + mask_zeros2 = (prediction_regions_org2[:, :, 0] == 0) + mask_lines2 = (prediction_regions_org2[:, :, 0] == 3) + text_sume_early = (prediction_regions_org[:, :] == 1).sum() prediction_regions_org_copy = np.copy(prediction_regions_org) - prediction_regions_org_copy[(prediction_regions_org_copy[:,:]==1) & (mask_zeros2[:,:]==1)] = 0 - text_sume_second = ((prediction_regions_org_copy[:,:]==1)*1).sum() + prediction_regions_org_copy[(prediction_regions_org_copy[:, :] == 1) & (mask_zeros2[:, :] == 1)] = 0 + text_sume_second = ((prediction_regions_org_copy[:, :] == 1) * 1).sum() rate_two_models = 100. * text_sume_second / text_sume_early self.logger.info("ratio_of_two_models: %s", rate_two_models) - if not(is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD): + if not (is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD): prediction_regions_org = np.copy(prediction_regions_org_copy) - prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3 - mask_lines_only=(prediction_regions_org[:,:]==3)*1 - prediction_regions_org = cv2.erode(prediction_regions_org[:,:], KERNEL, iterations=2) - prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2) + prediction_regions_org[(mask_lines2[:, :] == 1) & (prediction_regions_org[:, :] == 0)] = 3 + mask_lines_only = (prediction_regions_org[:, :] == 3) * 1 + prediction_regions_org = cv2.erode(prediction_regions_org[:, :], KERNEL, iterations=2) + prediction_regions_org = cv2.dilate(prediction_regions_org[:, :], KERNEL, iterations=2) - if rate_two_models<=40: + if rate_two_models <= 40: if self.input_binary: prediction_bin = np.copy(img_org) else: if not self.dir_in: self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) - prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) - prediction_bin = 255 * (prediction_bin[:,:,0]==0) + prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h) + prediction_bin = 255 * (prediction_bin[:, :, 0] == 0) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - + if not self.dir_in: self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) - ratio_y=1 - ratio_x=1 + ratio_y = 1 + ratio_x = 1 + + img = resize_image(prediction_bin, int(img_org.shape[0] * ratio_y), int(img_org.shape[1] * ratio_x)) - img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - prediction_regions_org = self.do_prediction(True, img, self.model_region) - prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) - prediction_regions_org=prediction_regions_org[:,:,0] - - mask_lines_only=(prediction_regions_org[:,:]==3)*1 - - mask_texts_only=(prediction_regions_org[:,:]==1)*1 - mask_images_only=(prediction_regions_org[:,:]==2)*1 - + prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h) + prediction_regions_org = prediction_regions_org[:, :, 0] + + mask_lines_only = (prediction_regions_org[:, :] == 3) * 1 + + mask_texts_only = (prediction_regions_org[:, :] == 1) * 1 + mask_images_only = (prediction_regions_org[:, :] == 2) * 1 + polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) polygons_lines_xml = filter_contours_area_of_image( mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) @@ -2087,67 +2098,65 @@ class Eynollah: polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) - text_regions_p_true = cv2.fillPoly(text_regions_p_true,pts = polygons_of_only_lines, color=(3, 3, 3)) - text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_lines, color=(3, 3, 3)) + text_regions_p_true[:, :][mask_images_only[:, :] == 1] = 2 - text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1, 1, 1)) self.logger.debug("exit get_regions_from_xy_2models") return text_regions_p_true, erosion_hurts, polygons_lines_xml except: if self.input_binary: prediction_bin = np.copy(img_org) - + if not self.dir_in: self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) - prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) - prediction_bin = 255 * (prediction_bin[:,:,0]==0) + prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h) + prediction_bin = 255 * (prediction_bin[:, :, 0] == 0) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - + if not self.dir_in: self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens) - + else: prediction_bin = np.copy(img_org) - ratio_y=1 - ratio_x=1 - + ratio_y = 1 + ratio_x = 1 - img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) + img = resize_image(prediction_bin, int(img_org.shape[0] * ratio_y), int(img_org.shape[1] * ratio_x)) prediction_regions_org = self.do_prediction(True, img, self.model_region) - prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) - prediction_regions_org=prediction_regions_org[:,:,0] - + prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h) + prediction_regions_org = prediction_regions_org[:, :, 0] + #mask_lines_only=(prediction_regions_org[:,:]==3)*1 #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1)) - + #prediction_regions_org = self.do_prediction(True, img, self.model_region) - + #prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) - + #prediction_regions_org = prediction_regions_org[:,:,0] - + #prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0 - - - mask_lines_only = (prediction_regions_org == 3)*1 - mask_texts_only = (prediction_regions_org == 1)*1 - mask_images_only= (prediction_regions_org == 2)*1 - + + mask_lines_only = (prediction_regions_org == 3) * 1 + mask_texts_only = (prediction_regions_org == 1) * 1 + mask_images_only = (prediction_regions_org == 2) * 1 + polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) polygons_lines_xml = filter_contours_area_of_image( mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) - - polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) - + + polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) + text_regions_p_true = np.zeros(prediction_regions_org.shape) - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) - - text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_lines, color=(3, 3, 3)) + + text_regions_p_true[:, :][mask_images_only[:, :] == 1] = 2 + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1, 1, 1)) + erosion_hurts = True self.logger.debug("exit get_regions_from_xy_2models") return text_regions_p_true, erosion_hurts, polygons_lines_xml @@ -2156,7 +2165,7 @@ class Eynollah: self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): self.logger.debug("enter do_order_of_regions_full_layout") - boxes = np.array(boxes, dtype=int) # to be on the safe side + boxes = np.array(boxes, dtype=int) # to be on the safe side cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours( contours_only_text_parent) cx_text_only_h, cy_text_only_h, x_min_text_only_h, _, _, _, y_cor_x_min_main_h = find_new_features_of_contours( @@ -2253,7 +2262,7 @@ class Eynollah: order_text_new = [] for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) - + except Exception as why: self.logger.error(why) arg_text_con = [] @@ -2266,7 +2275,7 @@ class Eynollah: arg_text_con.append(jj) check_if_textregion_located_in_a_box = True break - + if not check_if_textregion_located_in_a_box: dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) @@ -2351,7 +2360,7 @@ class Eynollah: order_text_new = [] for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) - + self.logger.debug("exit do_order_of_regions_full_layout") return order_text_new, id_of_texts_tot @@ -2359,7 +2368,7 @@ class Eynollah: self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): self.logger.debug("enter do_order_of_regions_no_full_layout") - boxes = np.array(boxes, dtype=int) # to be on the safe side + boxes = np.array(boxes, dtype=int) # to be on the safe side cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours( contours_only_text_parent) @@ -2421,7 +2430,7 @@ class Eynollah: order_text_new = [] for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) - + except Exception as why: self.logger.error(why) arg_text_con = [] @@ -2476,122 +2485,124 @@ class Eynollah: ref_point += len(id_of_texts) order_of_texts_tot = [] - + for tj1 in range(len(contours_only_text_parent)): order_of_texts_tot.append(int(order_by_con_main[tj1])) order_text_new = [] for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) - + self.logger.debug("exit do_order_of_regions_no_full_layout") return order_text_new, id_of_texts_tot def check_iou_of_bounding_box_and_contour_for_tables( self, layout, table_prediction_early, pixel_table, num_col_classifier): - layout_org = np.copy(layout) - layout_org[:,:,0][layout_org[:,:,0]==pixel_table] = 0 - layout = (layout[:,:,0]==pixel_table)*1 + layout_org = np.copy(layout) + layout_org[:, :, 0][layout_org[:, :, 0] == pixel_table] = 0 + layout = (layout[:, :, 0] == pixel_table) * 1 - layout =np.repeat(layout[:, :, np.newaxis], 3, axis=2) + layout = np.repeat(layout[:, :, np.newaxis], 3, axis=2) layout = layout.astype(np.uint8) - imgray = cv2.cvtColor(layout, cv2.COLOR_BGR2GRAY ) + imgray = cv2.cvtColor(layout, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) - + contours_new = [] for i in range(len(contours)): x, y, w, h = cv2.boundingRect(contours[i]) - iou = cnt_size[i] /float(w*h) *100 - if iou<80: + iou = cnt_size[i] / float(w * h) * 100 + if iou < 80: layout_contour = np.zeros((layout_org.shape[0], layout_org.shape[1])) - layout_contour= cv2.fillPoly(layout_contour,pts=[contours[i]] ,color=(1,1,1)) + layout_contour = cv2.fillPoly(layout_contour, pts=[contours[i]], color=(1, 1, 1)) layout_contour_sum = layout_contour.sum(axis=0) layout_contour_sum_diff = np.diff(layout_contour_sum) - layout_contour_sum_diff= np.abs(layout_contour_sum_diff) - layout_contour_sum_diff_smoothed= gaussian_filter1d(layout_contour_sum_diff, 10) + layout_contour_sum_diff = np.abs(layout_contour_sum_diff) + layout_contour_sum_diff_smoothed = gaussian_filter1d(layout_contour_sum_diff, 10) peaks, _ = find_peaks(layout_contour_sum_diff_smoothed, height=0) - peaks= peaks[layout_contour_sum_diff_smoothed[peaks]>4] - + peaks = peaks[layout_contour_sum_diff_smoothed[peaks] > 4] + for j in range(len(peaks)): - layout_contour[:,peaks[j]-3+1:peaks[j]+1+3] = 0 - - layout_contour=cv2.erode(layout_contour[:,:], KERNEL, iterations=5) - layout_contour=cv2.dilate(layout_contour[:,:], KERNEL, iterations=5) - - layout_contour =np.repeat(layout_contour[:, :, np.newaxis], 3, axis=2) + layout_contour[:, peaks[j] - 3 + 1:peaks[j] + 1 + 3] = 0 + + layout_contour = cv2.erode(layout_contour[:, :], KERNEL, iterations=5) + layout_contour = cv2.dilate(layout_contour[:, :], KERNEL, iterations=5) + + layout_contour = np.repeat(layout_contour[:, :, np.newaxis], 3, axis=2) layout_contour = layout_contour.astype(np.uint8) - - imgray = cv2.cvtColor(layout_contour, cv2.COLOR_BGR2GRAY ) + + imgray = cv2.cvtColor(layout_contour, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) contours_sep, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - for ji in range(len(contours_sep) ): + for ji in range(len(contours_sep)): contours_new.append(contours_sep[ji]) - if num_col_classifier>=2: - only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1])) - only_recent_contour_image= cv2.fillPoly(only_recent_contour_image, pts=[contours_sep[ji]], color=(1,1,1)) + if num_col_classifier >= 2: + only_recent_contour_image = np.zeros((layout.shape[0], layout.shape[1])) + only_recent_contour_image = cv2.fillPoly(only_recent_contour_image, pts=[contours_sep[ji]], + color=(1, 1, 1)) table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum() #print(iou_in,'iou_in_in1') - - if iou_in>30: - layout_org= cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=3 * (pixel_table,)) + + if iou_in > 30: + layout_org = cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=3 * (pixel_table,)) else: pass else: - layout_org= cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=3 * (pixel_table,)) + layout_org = cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=3 * (pixel_table,)) else: contours_new.append(contours[i]) - if num_col_classifier>=2: - only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1])) - only_recent_contour_image= cv2.fillPoly(only_recent_contour_image,pts=[contours[i]] ,color=(1,1,1)) - + if num_col_classifier >= 2: + only_recent_contour_image = np.zeros((layout.shape[0], layout.shape[1])) + only_recent_contour_image = cv2.fillPoly(only_recent_contour_image, pts=[contours[i]], + color=(1, 1, 1)) + table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum() #print(iou_in,'iou_in') - if iou_in>30: - layout_org= cv2.fillPoly(layout_org, pts=[contours[i]], color=3 * (pixel_table,)) + if iou_in > 30: + layout_org = cv2.fillPoly(layout_org, pts=[contours[i]], color=3 * (pixel_table,)) else: pass else: - layout_org= cv2.fillPoly(layout_org, pts=[contours[i]], color=3 * (pixel_table,)) - + layout_org = cv2.fillPoly(layout_org, pts=[contours[i]], color=3 * (pixel_table,)) + return layout_org, contours_new - def delete_separator_around(self, spliter_y,peaks_neg,image_by_region, pixel_line, pixel_table): + def delete_separator_around(self, spliter_y, peaks_neg, image_by_region, pixel_line, pixel_table): # format of subboxes: box=[x1, x2 , y1, y2] pix_del = 100 - if len(image_by_region.shape)==3: - for i in range(len(spliter_y)-1): - for j in range(1,len(peaks_neg[i])-1): + if len(image_by_region.shape) == 3: + for i in range(len(spliter_y) - 1): + for j in range(1, len(peaks_neg[i]) - 1): ys = slice(int(spliter_y[i]), - int(spliter_y[i+1])) + int(spliter_y[i + 1])) xs = slice(peaks_neg[i][j] - pix_del, peaks_neg[i][j] + pix_del) - image_by_region[ys,xs,0][image_by_region[ys,xs,0]==pixel_line] = 0 - image_by_region[ys,xs,0][image_by_region[ys,xs,1]==pixel_line] = 0 - image_by_region[ys,xs,0][image_by_region[ys,xs,2]==pixel_line] = 0 - - image_by_region[ys,xs,0][image_by_region[ys,xs,0]==pixel_table] = 0 - image_by_region[ys,xs,0][image_by_region[ys,xs,1]==pixel_table] = 0 - image_by_region[ys,xs,0][image_by_region[ys,xs,2]==pixel_table] = 0 + image_by_region[ys, xs, 0][image_by_region[ys, xs, 0] == pixel_line] = 0 + image_by_region[ys, xs, 0][image_by_region[ys, xs, 1] == pixel_line] = 0 + image_by_region[ys, xs, 0][image_by_region[ys, xs, 2] == pixel_line] = 0 + + image_by_region[ys, xs, 0][image_by_region[ys, xs, 0] == pixel_table] = 0 + image_by_region[ys, xs, 0][image_by_region[ys, xs, 1] == pixel_table] = 0 + image_by_region[ys, xs, 0][image_by_region[ys, xs, 2] == pixel_table] = 0 else: - for i in range(len(spliter_y)-1): - for j in range(1,len(peaks_neg[i])-1): + for i in range(len(spliter_y) - 1): + for j in range(1, len(peaks_neg[i]) - 1): ys = slice(int(spliter_y[i]), - int(spliter_y[i+1])) + int(spliter_y[i + 1])) xs = slice(peaks_neg[i][j] - pix_del, peaks_neg[i][j] + pix_del) - image_by_region[ys,xs][image_by_region[ys,xs]==pixel_line] = 0 - image_by_region[ys,xs][image_by_region[ys,xs]==pixel_table] = 0 + image_by_region[ys, xs][image_by_region[ys, xs] == pixel_line] = 0 + image_by_region[ys, xs][image_by_region[ys, xs] == pixel_table] = 0 return image_by_region def add_tables_heuristic_to_layout( @@ -2599,103 +2610,111 @@ class Eynollah: slope_mean_hor, spliter_y, peaks_neg_tot, image_revised, num_col_classifier, min_area, pixel_line): - pixel_table =10 + pixel_table = 10 image_revised_1 = self.delete_separator_around(spliter_y, peaks_neg_tot, image_revised, pixel_line, pixel_table) - + try: - image_revised_1[:,:30][image_revised_1[:,:30]==pixel_line] = 0 - image_revised_1[:,-30:][image_revised_1[:,-30:]==pixel_line] = 0 + image_revised_1[:, :30][image_revised_1[:, :30] == pixel_line] = 0 + image_revised_1[:, -30:][image_revised_1[:, -30:] == pixel_line] = 0 except: pass - boxes = np.array(boxes, dtype=int) # to be on the safe side - + boxes = np.array(boxes, dtype=int) # to be on the safe side + img_comm_e = np.zeros(image_revised_1.shape) img_comm = np.repeat(img_comm_e[:, :, np.newaxis], 3, axis=2) for indiv in np.unique(image_revised_1): - image_col=(image_revised_1==indiv)*255 - img_comm_in=np.repeat(image_col[:, :, np.newaxis], 3, axis=2) - img_comm_in=img_comm_in.astype(np.uint8) + image_col = (image_revised_1 == indiv) * 255 + img_comm_in = np.repeat(image_col[:, :, np.newaxis], 3, axis=2) + img_comm_in = img_comm_in.astype(np.uint8) imgray = cv2.cvtColor(img_comm_in, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours,hirarchy=cv2.findContours(thresh.copy(), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + contours, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - if indiv==pixel_table: - main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area = 1, min_area = 0.001) + if indiv == pixel_table: + main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area=1, + min_area=0.001) else: - main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area = 1, min_area = min_area) + main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area=1, + min_area=min_area) - img_comm = cv2.fillPoly(img_comm, pts = main_contours, color = (indiv, indiv, indiv)) + img_comm = cv2.fillPoly(img_comm, pts=main_contours, color=(indiv, indiv, indiv)) img_comm = img_comm.astype(np.uint8) - + if not self.isNaN(slope_mean_hor): - image_revised_last = np.zeros((image_regions_eraly_p.shape[0], image_regions_eraly_p.shape[1],3)) + image_revised_last = np.zeros((image_regions_eraly_p.shape[0], image_regions_eraly_p.shape[1], 3)) for i in range(len(boxes)): box_ys = slice(*boxes[i][2:4]) box_xs = slice(*boxes[i][0:2]) image_box = img_comm[box_ys, box_xs] try: - image_box_tabels_1=(image_box[:,:,0]==pixel_table)*1 - contours_tab,_=return_contours_of_image(image_box_tabels_1) - contours_tab=filter_contours_area_of_image_tables(image_box_tabels_1,contours_tab,_,1,0.003) - image_box_tabels_1=(image_box[:,:,0]==pixel_line)*1 + image_box_tabels_1 = (image_box[:, :, 0] == pixel_table) * 1 + contours_tab, _ = return_contours_of_image(image_box_tabels_1) + contours_tab = filter_contours_area_of_image_tables(image_box_tabels_1, contours_tab, _, 1, 0.003) + image_box_tabels_1 = (image_box[:, :, 0] == pixel_line) * 1 - image_box_tabels_and_m_text=( (image_box[:,:,0]==pixel_table) | (image_box[:,:,0]==1) )*1 - image_box_tabels_and_m_text=image_box_tabels_and_m_text.astype(np.uint8) + image_box_tabels_and_m_text = ((image_box[:, :, 0] == pixel_table) | (image_box[:, :, 0] == 1)) * 1 + image_box_tabels_and_m_text = image_box_tabels_and_m_text.astype(np.uint8) - image_box_tabels_1=image_box_tabels_1.astype(np.uint8) - image_box_tabels_1 = cv2.dilate(image_box_tabels_1,KERNEL,iterations = 5) + image_box_tabels_1 = image_box_tabels_1.astype(np.uint8) + image_box_tabels_1 = cv2.dilate(image_box_tabels_1, KERNEL, iterations=5) - contours_table_m_text,_=return_contours_of_image(image_box_tabels_and_m_text) - image_box_tabels=np.repeat(image_box_tabels_1[:, :, np.newaxis], 3, axis=2) + contours_table_m_text, _ = return_contours_of_image(image_box_tabels_and_m_text) + image_box_tabels = np.repeat(image_box_tabels_1[:, :, np.newaxis], 3, axis=2) - image_box_tabels=image_box_tabels.astype(np.uint8) + image_box_tabels = image_box_tabels.astype(np.uint8) imgray = cv2.cvtColor(image_box_tabels, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_line,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + contours_line, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - y_min_main_line ,y_max_main_line=find_features_of_contours(contours_line) - y_min_main_tab ,y_max_main_tab=find_features_of_contours(contours_tab) + y_min_main_line, y_max_main_line = find_features_of_contours(contours_line) + y_min_main_tab, y_max_main_tab = find_features_of_contours(contours_tab) - cx_tab_m_text,cy_tab_m_text ,x_min_tab_m_text , x_max_tab_m_text, y_min_tab_m_text ,y_max_tab_m_text, _= find_new_features_of_contours(contours_table_m_text) - cx_tabl,cy_tabl ,x_min_tabl , x_max_tabl, y_min_tabl ,y_max_tabl,_= find_new_features_of_contours(contours_tab) + cx_tab_m_text, cy_tab_m_text, x_min_tab_m_text, x_max_tab_m_text, y_min_tab_m_text, y_max_tab_m_text, _ = find_new_features_of_contours( + contours_table_m_text) + cx_tabl, cy_tabl, x_min_tabl, x_max_tabl, y_min_tabl, y_max_tabl, _ = find_new_features_of_contours( + contours_tab) - if len(y_min_main_tab )>0: - y_down_tabs=[] - y_up_tabs=[] + if len(y_min_main_tab) > 0: + y_down_tabs = [] + y_up_tabs = [] - for i_t in range(len(y_min_main_tab )): - y_down_tab=[] - y_up_tab=[] + for i_t in range(len(y_min_main_tab)): + y_down_tab = [] + y_up_tab = [] for i_l in range(len(y_min_main_line)): - if y_min_main_tab[i_t]>y_min_main_line[i_l] and y_max_main_tab[i_t]>y_min_main_line[i_l] and y_min_main_tab[i_t]>y_max_main_line[i_l] and y_max_main_tab[i_t]>y_min_main_line[i_l]: + if y_min_main_tab[i_t] > y_min_main_line[i_l] and y_max_main_tab[i_t] > y_min_main_line[ + i_l] and y_min_main_tab[i_t] > y_max_main_line[i_l] and y_max_main_tab[i_t] > \ + y_min_main_line[i_l]: pass - elif y_min_main_tab[i_t]0: + + _, _, _, _, y_min_tab_col1, y_max_tab_col1, _ = find_new_features_of_contours(contours_table_col1) + + if len(y_min_tab_col1) > 0: for ijv in range(len(y_min_tab_col1)): - image_revised_last[int(y_min_tab_col1[ijv]):int(y_max_tab_col1[ijv]),:,:]=pixel_table + image_revised_last[int(y_min_tab_col1[ijv]):int(y_max_tab_col1[ijv]), :, :] = pixel_table return image_revised_last def do_order_of_regions(self, *args, **kwargs): if self.full_layout: return self.do_order_of_regions_full_layout(*args, **kwargs) return self.do_order_of_regions_no_full_layout(*args, **kwargs) - + def get_tables_from_model(self, img, num_col_classifier): img_org = np.copy(img) img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] - + if not self.dir_in: self.model_table, _ = self.start_new_session_and_model(self.model_table_dir) - + patches = False if self.light_version: prediction_table = self.do_prediction_new_concept(patches, img, self.model_table) prediction_table = prediction_table.astype(np.int16) - return prediction_table[:,:,0] + return prediction_table[:, :, 0] else: if 4 > num_col_classifier > 2: prediction_table = self.do_prediction(patches, img, self.model_table) - pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_table) + pre_updown = self.do_prediction(patches, cv2.flip(img[:, :, :], -1), self.model_table) pre_updown = cv2.flip(pre_updown, -1) - - prediction_table[:,:,0][pre_updown[:,:,0]==1]=1 + + prediction_table[:, :, 0][pre_updown[:, :, 0] == 1] = 1 prediction_table = prediction_table.astype(np.int16) - - elif num_col_classifier ==2: - height_ext = 0 # img.shape[0] // 4 + + elif num_col_classifier == 2: + height_ext = 0 # img.shape[0] // 4 h_start = height_ext // 2 width_ext = img.shape[1] // 8 w_start = width_ext // 2 - + img_new = np.zeros((img.shape[0] + height_ext, img.shape[1] + width_ext, img.shape[2])).astype(float) @@ -2757,63 +2776,64 @@ class Eynollah: img_new[ys, xs] = img prediction_ext = self.do_prediction(patches, img_new, self.model_table) - pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_table) + pre_updown = self.do_prediction(patches, cv2.flip(img_new[:, :, :], -1), self.model_table) pre_updown = cv2.flip(pre_updown, -1) - + prediction_table = prediction_ext[ys, xs] prediction_table_updown = pre_updown[ys, xs] - - prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 + + prediction_table[:, :, 0][prediction_table_updown[:, :, 0] == 1] = 1 prediction_table = prediction_table.astype(np.int16) - elif num_col_classifier ==1: - height_ext = 0 # img.shape[0] // 4 + elif num_col_classifier == 1: + height_ext = 0 # img.shape[0] // 4 h_start = height_ext // 2 width_ext = img.shape[1] // 4 w_start = width_ext // 2 - - img_new =np.zeros((img.shape[0] + height_ext, - img.shape[1] + width_ext, - img.shape[2])).astype(float) + + img_new = np.zeros((img.shape[0] + height_ext, + img.shape[1] + width_ext, + img.shape[2])).astype(float) ys = slice(h_start, h_start + img.shape[0]) xs = slice(w_start, w_start + img.shape[1]) img_new[ys, xs] = img prediction_ext = self.do_prediction(patches, img_new, self.model_table) - pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_table) + pre_updown = self.do_prediction(patches, cv2.flip(img_new[:, :, :], -1), self.model_table) pre_updown = cv2.flip(pre_updown, -1) - + prediction_table = prediction_ext[ys, xs] prediction_table_updown = pre_updown[ys, xs] - - prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 + + prediction_table[:, :, 0][prediction_table_updown[:, :, 0] == 1] = 1 prediction_table = prediction_table.astype(np.int16) else: prediction_table = np.zeros(img.shape) img_w_half = img.shape[1] // 2 - pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], self.model_table) - pre2 = self.do_prediction(patches, img[:,img_w_half:,:], self.model_table) - pre_full = self.do_prediction(patches, img[:,:,:], self.model_table) - pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_table) + pre1 = self.do_prediction(patches, img[:, 0:img_w_half, :], self.model_table) + pre2 = self.do_prediction(patches, img[:, img_w_half:, :], self.model_table) + pre_full = self.do_prediction(patches, img[:, :, :], self.model_table) + pre_updown = self.do_prediction(patches, cv2.flip(img[:, :, :], -1), self.model_table) pre_updown = cv2.flip(pre_updown, -1) - - prediction_table_full_erode = cv2.erode(pre_full[:,:,0], KERNEL, iterations=4) + + prediction_table_full_erode = cv2.erode(pre_full[:, :, 0], KERNEL, iterations=4) prediction_table_full_erode = cv2.dilate(prediction_table_full_erode, KERNEL, iterations=4) - - prediction_table_full_updown_erode = cv2.erode(pre_updown[:,:,0], KERNEL, iterations=4) - prediction_table_full_updown_erode = cv2.dilate(prediction_table_full_updown_erode, KERNEL, iterations=4) - prediction_table[:,0:img_w_half,:] = pre1[:,:,:] - prediction_table[:,img_w_half:,:] = pre2[:,:,:] - - prediction_table[:,:,0][prediction_table_full_erode[:,:]==1]=1 - prediction_table[:,:,0][prediction_table_full_updown_erode[:,:]==1]=1 + prediction_table_full_updown_erode = cv2.erode(pre_updown[:, :, 0], KERNEL, iterations=4) + prediction_table_full_updown_erode = cv2.dilate(prediction_table_full_updown_erode, KERNEL, + iterations=4) + + prediction_table[:, 0:img_w_half, :] = pre1[:, :, :] + prediction_table[:, img_w_half:, :] = pre2[:, :, :] + + prediction_table[:, :, 0][prediction_table_full_erode[:, :] == 1] = 1 + prediction_table[:, :, 0][prediction_table_full_updown_erode[:, :] == 1] = 1 prediction_table = prediction_table.astype(np.int16) - + #prediction_table_erode = cv2.erode(prediction_table[:,:,0], self.kernel, iterations=6) #prediction_table_erode = cv2.dilate(prediction_table_erode, self.kernel, iterations=6) - - prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20) + + prediction_table_erode = cv2.erode(prediction_table[:, :, 0], KERNEL, iterations=20) prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20) return prediction_table_erode.astype(np.int16) @@ -2838,14 +2858,14 @@ class Eynollah: table_prediction = self.get_tables_from_model(image_page, num_col_classifier) else: table_prediction = np.zeros((image_page.shape[0], image_page.shape[1])).astype(np.int16) - + if self.plotter: self.plotter.save_page_image(image_page) - text_regions_p_1 = text_regions_p_1[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - + text_regions_p_1 = text_regions_p_1[page_coord[0]: page_coord[1], page_coord[2]: page_coord[3]] + textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0]: page_coord[1], page_coord[2]: page_coord[3]] + img_bin_light = img_bin_light[page_coord[0]: page_coord[1], page_coord[2]: page_coord[3]] + mask_images = (text_regions_p_1[:, :] == 2) * 1 mask_images = mask_images.astype(np.uint8) mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10) @@ -2853,13 +2873,13 @@ class Eynollah: mask_lines = mask_lines.astype(np.uint8) img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) - + #print("inside graphics 2 ", time.time() - t_in_gr) if erosion_hurts: - img_only_regions = np.copy(img_only_regions_with_sep[:,:]) + img_only_regions = np.copy(img_only_regions_with_sep[:, :]) else: - img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6) - + img_only_regions = cv2.erode(img_only_regions_with_sep[:, :], KERNEL, iterations=6) + ##print(img_only_regions.shape,'img_only_regions') ##plt.imshow(img_only_regions[:,:]) ##plt.show() @@ -2875,7 +2895,7 @@ class Eynollah: #print("inside graphics 3 ", time.time() - t_in_gr) return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light) - + def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, img_bin_light): #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics') #print(erosion_hurts, 'erosion_hurts') @@ -2890,11 +2910,11 @@ class Eynollah: image_page, page_coord, cont_page = self.extract_page() #print("inside graphics 1 ", time.time() - t_in_gr) - - textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - - return page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page + + textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0]: page_coord[1], page_coord[2]: page_coord[3]] + img_bin_light = img_bin_light[page_coord[0]: page_coord[1], page_coord[2]: page_coord[3]] + + return page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page def run_graphics_and_columns( self, text_regions_p_1, @@ -2910,16 +2930,16 @@ class Eynollah: img_g3[:, :, 2] = img_g[:, :] image_page, page_coord, cont_page = self.extract_page() - + if self.tables: table_prediction = self.get_tables_from_model(image_page, num_col_classifier) else: table_prediction = np.zeros((image_page.shape[0], image_page.shape[1])).astype(np.int16) - + if self.plotter: self.plotter.save_page_image(image_page) - text_regions_p_1 = text_regions_p_1[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + text_regions_p_1 = text_regions_p_1[page_coord[0]: page_coord[1], page_coord[2]: page_coord[3]] mask_images = (text_regions_p_1[:, :] == 2) * 1 mask_images = mask_images.astype(np.uint8) mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10) @@ -2927,11 +2947,11 @@ class Eynollah: mask_lines = mask_lines.astype(np.uint8) img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) - + if erosion_hurts: - img_only_regions = np.copy(img_only_regions_with_sep[:,:]) + img_only_regions = np.copy(img_only_regions_with_sep[:, :]) else: - img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6) + img_only_regions = cv2.erode(img_only_regions_with_sep[:, :], KERNEL, iterations=6) try: num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) num_col = num_col + 1 @@ -2964,16 +2984,18 @@ class Eynollah: else: self.get_image_and_scales(img_org, img_res, scale) if self.allow_scaling: - img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced, img_bin) + img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced, + img_bin) self.get_image_and_scales_after_enhancing(img_org, img_res) #print("enhancement in ", time.time()-t_in) return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified def run_textline(self, image_page, num_col_classifier=None): - scaler_h_textline = 1#1.3 # 1.2#1.2 - scaler_w_textline = 1#1.3 # 0.9#1 + scaler_h_textline = 1 #1.3 # 1.2#1.2 + scaler_w_textline = 1 #1.3 # 0.9#1 #print(image_page.shape) - textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier) + textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, + num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) @@ -3007,7 +3029,7 @@ class Eynollah: try: regions_without_separators = (text_regions_p[:, :] == 1) * 1 if self.tables: - regions_without_separators[table_prediction==1] = 1 + regions_without_separators[table_prediction == 1] = 1 regions_without_separators = regions_without_separators.astype(np.uint8) text_regions_p = get_marginals( rotate_image(regions_without_separators, slope_deskew), text_regions_p, @@ -3034,11 +3056,12 @@ class Eynollah: table_prediction_n = resize_image(table_prediction_n, text_regions_p.shape[0], text_regions_p.shape[1]) regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1 if self.tables: - regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 - regions_without_separators = (text_regions_p[:, :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) + regions_without_separators_d[table_prediction_n[:, :] == 1] = 1 + regions_without_separators = (text_regions_p[:, + :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) #print(time.time()-t_0_box,'time box in 1') if self.tables: - regions_without_separators[table_prediction ==1 ] = 1 + regions_without_separators[table_prediction == 1] = 1 if np.abs(slope_deskew) < SLOPE_THRESHOLD: text_regions_p_1_n = None textline_mask_tot_d = None @@ -3072,17 +3095,17 @@ class Eynollah: boxes_d = None self.logger.debug("len(boxes): %s", len(boxes)) #print(time.time()-t_0_box,'time box in 3.1') - + if self.tables: if self.light_version: pass else: text_regions_p_tables = np.copy(text_regions_p) - text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 + text_regions_p_tables[:, :][(table_prediction[:, :] == 1)] = 10 pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables, - num_col_classifier , 0.000005, pixel_line) + num_col_classifier, 0.000005, pixel_line) #print(time.time()-t_0_box,'time box in 3.2') img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables( img_revised_tab2, table_prediction, 10, num_col_classifier) @@ -3093,46 +3116,49 @@ class Eynollah: num_col_classifier, erosion_hurts, self.tables, self.right2left) boxes = None self.logger.debug("len(boxes): %s", len(boxes_d)) - + if self.tables: if self.light_version: pass else: text_regions_p_tables = np.copy(text_regions_p_1_n) - text_regions_p_tables =np.round(text_regions_p_tables) - text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 - + text_regions_p_tables = np.round(text_regions_p_tables) + text_regions_p_tables[:, :][ + (text_regions_p_tables[:, :] != 3) & (table_prediction_n[:, :] == 1)] = 10 + pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( - text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables, + text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, + text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) - img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables( + img_revised_tab2_d, _ = self.check_iou_of_bounding_box_and_contour_for_tables( img_revised_tab2, table_prediction_n, 10, num_col_classifier) - + img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) - img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) + img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], + text_regions_p.shape[1]) #print(time.time()-t_0_box,'time box in 4') self.logger.info("detecting boxes took %.1fs", time.time() - t1) - + if self.tables: if self.light_version: - text_regions_p[:,:][table_prediction[:,:]==1] = 10 - img_revised_tab=text_regions_p[:,:] + text_regions_p[:, :][table_prediction[:, :] == 1] = 10 + img_revised_tab = text_regions_p[:, :] else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - img_revised_tab = np.copy(img_revised_tab2[:,:,0]) - img_revised_tab[:,:][(text_regions_p[:,:] == 1) & (img_revised_tab[:,:] != 10)] = 1 + img_revised_tab = np.copy(img_revised_tab2[:, :, 0]) + img_revised_tab[:, :][(text_regions_p[:, :] == 1) & (img_revised_tab[:, :] != 10)] = 1 else: - img_revised_tab = np.copy(text_regions_p[:,:]) - img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 - img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 - - text_regions_p[:,:][text_regions_p[:,:]==10] = 0 - text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 + img_revised_tab = np.copy(text_regions_p[:, :]) + img_revised_tab[:, :][img_revised_tab[:, :] == 10] = 0 + img_revised_tab[:, :][img_revised_tab2_d_rotated[:, :, 0] == 10] = 10 + + text_regions_p[:, :][text_regions_p[:, :] == 10] = 0 + text_regions_p[:, :][img_revised_tab[:, :] == 10] = 10 else: - img_revised_tab=text_regions_p[:,:] + img_revised_tab = text_regions_p[:, :] #img_revised_tab = text_regions_p[:, :] if self.light_version: polygons_of_images = return_contours_of_interested_region(text_regions_p, 2) @@ -3142,14 +3168,14 @@ class Eynollah: pixel_img = 4 min_area_mar = 0.00001 if self.light_version: - marginal_mask = (text_regions_p[:,:]==pixel_img)*1 + marginal_mask = (text_regions_p[:, :] == pixel_img) * 1 marginal_mask = marginal_mask.astype('uint8') marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2) - + polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar) else: polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) - + pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) #print(time.time()-t_0_box,'time box in 5') @@ -3167,38 +3193,46 @@ class Eynollah: t_full0 = time.time() if self.tables: if self.light_version: - text_regions_p[:,:][table_prediction[:,:]==1] = 10 - img_revised_tab = text_regions_p[:,:] + text_regions_p[:, :][table_prediction[:, :] == 1] = 10 + img_revised_tab = text_regions_p[:, :] if np.abs(slope_deskew) >= SLOPE_THRESHOLD: image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ - rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) - - text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) - textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) - table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) - - regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1 - regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 + rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, + slope_deskew) + + text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], + text_regions_p.shape[1]) + textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], + text_regions_p.shape[1]) + table_prediction_n = resize_image(table_prediction_n, text_regions_p.shape[0], + text_regions_p.shape[1]) + + regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1 + regions_without_separators_d[table_prediction_n[:, :] == 1] = 1 else: text_regions_p_1_n = None textline_mask_tot_d = None regions_without_separators_d = None # regions_without_separators = ( text_regions_p[:,:]==1 | text_regions_p[:,:]==2 )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) - regions_without_separators = (text_regions_p[:,:] == 1)*1 + regions_without_separators = (text_regions_p[:, :] == 1) * 1 regions_without_separators[table_prediction == 1] = 1 else: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ - rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) - - text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) - textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) - table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) - - regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1 - regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 + rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, + slope_deskew) + + text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], + text_regions_p.shape[1]) + textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], + text_regions_p.shape[1]) + table_prediction_n = resize_image(table_prediction_n, text_regions_p.shape[0], + text_regions_p.shape[1]) + + regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1 + regions_without_separators_d[table_prediction_n[:, :] == 1] = 1 else: text_regions_p_1_n = None textline_mask_tot_d = None @@ -3206,43 +3240,44 @@ class Eynollah: # regions_without_separators = ( text_regions_p[:,:]==1 | text_regions_p[:,:]==2 )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) - regions_without_separators = (text_regions_p[:,:] == 1)*1 + regions_without_separators = (text_regions_p[:, :] == 1) * 1 regions_without_separators[table_prediction == 1] = 1 - - pixel_lines=3 + + pixel_lines = 3 if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: num_col_d, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - if num_col_classifier>=3: + if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:,:], KERNEL, iterations=6) - + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:,:], KERNEL, iterations=6) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, + iterations=6) else: pass - + if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) text_regions_p_tables = np.copy(text_regions_p) - text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10 + text_regions_p_tables[:, :][(table_prediction[:, :] == 1)] = 10 pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables, - num_col_classifier , 0.000005, pixel_line) - - img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables( + num_col_classifier, 0.000005, pixel_line) + + img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables( img_revised_tab2, table_prediction, 10, num_col_classifier) else: boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( @@ -3250,50 +3285,53 @@ class Eynollah: num_col_classifier, erosion_hurts, self.tables, self.right2left) text_regions_p_tables = np.copy(text_regions_p_1_n) text_regions_p_tables = np.round(text_regions_p_tables) - text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10 - + text_regions_p_tables[:, :][ + (text_regions_p_tables[:, :] != 3) & (table_prediction_n[:, :] == 1)] = 10 + pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( - text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables, + text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, + text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) - - img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables( + + img_revised_tab2_d, _ = self.check_iou_of_bounding_box_and_contour_for_tables( img_revised_tab2, table_prediction_n, 10, num_col_classifier) img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) - img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) + img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], + text_regions_p.shape[1]) if np.abs(slope_deskew) < 0.13: - img_revised_tab = np.copy(img_revised_tab2[:,:,0]) + img_revised_tab = np.copy(img_revised_tab2[:, :, 0]) else: - img_revised_tab = np.copy(text_regions_p[:,:]) - img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 - img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 - + img_revised_tab = np.copy(text_regions_p[:, :]) + img_revised_tab[:, :][img_revised_tab[:, :] == 10] = 0 + img_revised_tab[:, :][img_revised_tab2_d_rotated[:, :, 0] == 10] = 10 + ##img_revised_tab=img_revised_tab2[:,:,0] #img_revised_tab=text_regions_p[:,:] - text_regions_p[:,:][text_regions_p[:,:]==10] = 0 - text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 + text_regions_p[:, :][text_regions_p[:, :] == 10] = 0 + text_regions_p[:, :][img_revised_tab[:, :] == 10] = 10 #img_revised_tab[img_revised_tab2[:,:,0]==10] =10 - + pixel_img = 4 min_area_mar = 0.00001 - + if self.light_version: - marginal_mask = (text_regions_p[:,:]==pixel_img)*1 + marginal_mask = (text_regions_p[:, :] == pixel_img) * 1 marginal_mask = marginal_mask.astype('uint8') marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2) - + polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar) else: polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) - + pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) - + # set first model with second model text_regions_p[:, :][text_regions_p[:, :] == 2] = 5 text_regions_p[:, :][text_regions_p[:, :] == 3] = 6 @@ -3308,30 +3346,30 @@ class Eynollah: # 6 is the separators lable in old full layout model # 4 is the drop capital class in old full layout model # in the new full layout drop capital is 3 and separators are 5 - - text_regions_p[:,:][regions_fully[:,:,0]==5]=6 + + text_regions_p[:, :][regions_fully[:, :, 0] == 5] = 6 ###regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4 - + #text_regions_p[:,:][regions_fully[:,:,0]==6]=6 ##regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) ##regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 drop_capital_label_in_full_layout_model = 3 - - drops = (regions_fully[:,:,0]==drop_capital_label_in_full_layout_model)*1 - drops= drops.astype(np.uint8) - - regions_fully[:,:,0][regions_fully[:,:,0]==drop_capital_label_in_full_layout_model] = 1 - - drops = cv2.erode(drops[:,:], KERNEL, iterations=1) - regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model - + + drops = (regions_fully[:, :, 0] == drop_capital_label_in_full_layout_model) * 1 + drops = drops.astype(np.uint8) + + regions_fully[:, :, 0][regions_fully[:, :, 0] == drop_capital_label_in_full_layout_model] = 1 + + drops = cv2.erode(drops[:, :], KERNEL, iterations=1) + regions_fully[:, :, 0][drops[:, :] == 1] = drop_capital_label_in_full_layout_model + regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout( regions_fully, drop_capital_label_in_full_layout_model, text_regions_p) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: - ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 + ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 ##else: - ##regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p) + ##regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p) ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) # plt.imshow(regions_fully[:,:,0]) @@ -3358,13 +3396,13 @@ class Eynollah: regions_without_separators = (text_regions_p[:, :] == 1) * 1 img_revised_tab = np.copy(text_regions_p[:, :]) polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5) - + self.logger.debug('exit run_boxes_full_layout') #print("full inside 3", time.time()- t_full0) return (polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables) - + def our_load_model(self, model_file): try: model = load_model(model_file, compile=False) @@ -3377,19 +3415,19 @@ class Eynollah: y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] - img_poly = np.zeros((y_len,x_len), dtype='uint8') - img_poly[text_regions_p[:,:]==1] = 1 - img_poly[text_regions_p[:,:]==2] = 2 - img_poly[text_regions_p[:,:]==3] = 4 - img_poly[text_regions_p[:,:]==6] = 5 + img_poly = np.zeros((y_len, x_len), dtype='uint8') + img_poly[text_regions_p[:, :] == 1] = 1 + img_poly[text_regions_p[:, :] == 2] = 2 + img_poly[text_regions_p[:, :] == 3] = 4 + img_poly[text_regions_p[:, :] == 6] = 5 - img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') + img_header_and_sep = np.zeros((y_len, x_len), dtype='uint8') if contours_only_text_parent_h: _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours( contours_only_text_parent_h) for j in range(len(cy_main)): - img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12, - int(x_min_main[j]):int(x_max_main[j])] = 1 + img_header_and_sep[int(y_max_main[j]):int(y_max_main[j]) + 12, + int(x_min_main[j]):int(x_max_main[j])] = 1 co_text_all = contours_only_text_parent + contours_only_text_parent_h else: co_text_all = contours_only_text_parent @@ -3399,18 +3437,18 @@ class Eynollah: labels_con = np.zeros((y_len, x_len, len(co_text_all)), dtype=bool) for i in range(len(co_text_all)): - img = labels_con[:,:,i].astype(np.uint8) + img = labels_con[:, :, i].astype(np.uint8) cv2.fillPoly(img, pts=[co_text_all[i]], color=(1,)) - labels_con[:,:,i] = img + labels_con[:, :, i] = img - height1 =672#448 - width1 = 448#224 + height1 = 672 #448 + width1 = 448 #224 - height2 =672#448 - width2= 448#224 + height2 = 672 #448 + width2 = 448 #224 - height3 =672#448 - width3 = 448#224 + height3 = 672 #448 + width3 = 448 #224 labels_con = resize_image(labels_con.astype(np.uint8), height1, width1).astype(bool) img_header_and_sep = resize_image(img_header_and_sep, height1, width1) @@ -3424,7 +3462,7 @@ class Eynollah: ordered = [list(range(len(co_text_all)))] index_update = 0 #print(labels_con.shape[2],"number of regions for reading order") - while index_update>=0: + while index_update >= 0: ij_list = ordered.pop(index_update) i = ij_list.pop(0) @@ -3433,12 +3471,12 @@ class Eynollah: tot_counter = 0 batch = [] for j in ij_list: - img1 = labels_con[:,:,i].astype(float) - img2 = labels_con[:,:,j].astype(float) - img1[img_poly==5] = 2 - img2[img_poly==5] = 2 - img1[img_header_and_sep==1] = 3 - img2[img_header_and_sep==1] = 3 + img1 = labels_con[:, :, i].astype(float) + img2 = labels_con[:, :, j].astype(float) + img1[img_poly == 5] = 2 + img2[img_poly == 5] = 2 + img1[img_header_and_sep == 1] = 3 + img2[img_header_and_sep == 1] = 3 input_1[len(batch), :, :, 0] = img1 / 3. input_1[len(batch), :, :, 2] = img2 / 3. @@ -3447,9 +3485,9 @@ class Eynollah: tot_counter += 1 batch.append(j) if tot_counter % inference_bs == 0 or tot_counter == len(ij_list): - y_pr = self.model_reading_order.predict(input_1 , verbose=0) + y_pr = self.model_reading_order.predict(input_1, verbose=0) for jb, j in enumerate(batch): - if y_pr[jb][0]>=0.5: + if y_pr[jb][0] >= 0.5: post_list.append(j) else: ante_list.append(j) @@ -3475,38 +3513,38 @@ class Eynollah: def return_start_and_end_of_common_text_of_textline_ocr(self, textline_image, ind_tot): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] - common_window = int(0.2*width) + common_window = int(0.2 * width) + + width1 = int(width / 2. - common_window) + width2 = int(width / 2. + common_window) - width1 = int ( width/2. - common_window ) - width2 = int ( width/2. + common_window ) - - img_sum = np.sum(textline_image[:,:,0], axis=0) + img_sum = np.sum(textline_image[:, :, 0], axis=0) sum_smoothed = gaussian_filter1d(img_sum, 3) - + peaks_real, _ = find_peaks(sum_smoothed, height=0) - if len(peaks_real)>70: + if len(peaks_real) > 70: print(len(peaks_real), 'len(peaks_real)') - peaks_real = peaks_real[(peaks_realwidth1)] + peaks_real = peaks_real[(peaks_real < width2) & (peaks_real > width1)] arg_sort = np.argsort(sum_smoothed[peaks_real]) - arg_sort4 =arg_sort[::-1][:4] + arg_sort4 = arg_sort[::-1][:4] peaks_sort_4 = peaks_real[arg_sort][::-1][:4] argsort_sorted = np.argsort(peaks_sort_4) first_4_sorted = peaks_sort_4[argsort_sorted] y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]] #print(first_4_sorted,'first_4_sorted') - + arg_sortnew = np.argsort(y_4_sorted) - peaks_final =np.sort( first_4_sorted[arg_sortnew][2:] ) - + peaks_final = np.sort(first_4_sorted[arg_sortnew][2:]) + #plt.figure(ind_tot) #plt.imshow(textline_image) #plt.plot([peaks_final[0], peaks_final[0]], [0, height-1]) #plt.plot([peaks_final[1], peaks_final[1]], [0, height-1]) #plt.savefig('./'+str(ind_tot)+'.png') - + return peaks_final[0], peaks_final[1] else: pass @@ -3514,29 +3552,29 @@ class Eynollah: def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image, ind_tot): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] - common_window = int(0.06*width) + common_window = int(0.06 * width) + + width1 = int(width / 2. - common_window) + width2 = int(width / 2. + common_window) - width1 = int ( width/2. - common_window ) - width2 = int ( width/2. + common_window ) - - img_sum = np.sum(textline_image[:,:,0], axis=0) + img_sum = np.sum(textline_image[:, :, 0], axis=0) sum_smoothed = gaussian_filter1d(img_sum, 3) - + peaks_real, _ = find_peaks(sum_smoothed, height=0) - if len(peaks_real)>70: + if len(peaks_real) > 70: #print(len(peaks_real), 'len(peaks_real)') - peaks_real = peaks_real[(peaks_realwidth1)] + peaks_real = peaks_real[(peaks_real < width2) & (peaks_real > width1)] arg_max = np.argmax(sum_smoothed[peaks_real]) peaks_final = peaks_real[arg_max] - + #plt.figure(ind_tot) #plt.imshow(textline_image) #plt.plot([peaks_final, peaks_final], [0, height-1]) ##plt.plot([peaks_final[1], peaks_final[1]], [0, height-1]) #plt.savefig('./'+str(ind_tot)+'.png') - + return peaks_final else: return None @@ -3544,52 +3582,52 @@ class Eynollah: def return_start_and_end_of_common_text_of_textline_ocr_new_splitted( self, peaks_real, sum_smoothed, start_split, end_split): - peaks_real = peaks_real[(peaks_realstart_split)] + peaks_real = peaks_real[(peaks_real < end_split) & (peaks_real > start_split)] arg_sort = np.argsort(sum_smoothed[peaks_real]) - arg_sort4 =arg_sort[::-1][:4] + arg_sort4 = arg_sort[::-1][:4] peaks_sort_4 = peaks_real[arg_sort][::-1][:4] argsort_sorted = np.argsort(peaks_sort_4) first_4_sorted = peaks_sort_4[argsort_sorted] y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]] #print(first_4_sorted,'first_4_sorted') - + arg_sortnew = np.argsort(y_4_sorted) - peaks_final =np.sort( first_4_sorted[arg_sortnew][3:] ) + peaks_final = np.sort(first_4_sorted[arg_sortnew][3:]) return peaks_final[0] def return_start_and_end_of_common_text_of_textline_ocr_new(self, textline_image, ind_tot): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] - common_window = int(0.15*width) + common_window = int(0.15 * width) - width1 = int ( width/2. - common_window ) - width2 = int ( width/2. + common_window ) - mid = int(width/2.) - - img_sum = np.sum(textline_image[:,:,0], axis=0) + width1 = int(width / 2. - common_window) + width2 = int(width / 2. + common_window) + mid = int(width / 2.) + + img_sum = np.sum(textline_image[:, :, 0], axis=0) sum_smoothed = gaussian_filter1d(img_sum, 3) - + peaks_real, _ = find_peaks(sum_smoothed, height=0) - if len(peaks_real)>70: + if len(peaks_real) > 70: peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted( - peaks_real, sum_smoothed, width1, mid+2) + peaks_real, sum_smoothed, width1, mid + 2) peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted( - peaks_real, sum_smoothed, mid-2, width2) - + peaks_real, sum_smoothed, mid - 2, width2) + #plt.figure(ind_tot) #plt.imshow(textline_image) #plt.plot([peak_start, peak_start], [0, height-1]) #plt.plot([peak_end, peak_end], [0, height-1]) #plt.savefig('./'+str(ind_tot)+'.png') - + return peak_start, peak_end else: pass def return_ocr_of_textline_without_common_section( - self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio, ind_tot): if h2w_ratio > 0.05: pixel_values = processor(textline_image, return_tensors="pt").pixel_values @@ -3601,31 +3639,31 @@ class Eynollah: #common_window = int(0.3*width) #width1 = int ( width/2. - common_window ) #width2 = int ( width/2. + common_window ) - + split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section( textline_image, ind_tot) if split_point: - image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height)) - image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height)) - + image1 = textline_image[:, :split_point, :] # image.crop((0, 0, width2, height)) + image2 = textline_image[:, split_point:, :] #image.crop((width1, 0, width, height)) + #pixel_values1 = processor(image1, return_tensors="pt").pixel_values #pixel_values2 = processor(image2, return_tensors="pt").pixel_values - - pixel_values_merged = processor([image1,image2], return_tensors="pt").pixel_values + + pixel_values_merged = processor([image1, image2], return_tensors="pt").pixel_values generated_ids_merged = model_ocr.generate(pixel_values_merged.to(device)) generated_text_merged = processor.batch_decode(generated_ids_merged, skip_special_tokens=True) - + #print(generated_text_merged,'generated_text_merged') - + #generated_ids1 = model_ocr.generate(pixel_values1.to(device)) #generated_ids2 = model_ocr.generate(pixel_values2.to(device)) - + #generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0] #generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0] - + #generated_text = generated_text1 + ' ' + generated_text2 generated_text = generated_text_merged[0] + ' ' + generated_text_merged[1] - + #print(generated_text1,'generated_text1') #print(generated_text2, 'generated_text2') #print('########################################') @@ -3633,13 +3671,13 @@ class Eynollah: pixel_values = processor(textline_image, return_tensors="pt").pixel_values generated_ids = model_ocr.generate(pixel_values.to(device)) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] - + #print(generated_text,'generated_text') #print('########################################') return generated_text def return_ocr_of_textline( - self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio, ind_tot): if h2w_ratio > 0.05: pixel_values = processor(textline_image, return_tensors="pt").pixel_values @@ -3651,66 +3689,66 @@ class Eynollah: #common_window = int(0.3*width) #width1 = int ( width/2. - common_window ) #width2 = int ( width/2. + common_window ) - + try: width1, width2 = self.return_start_and_end_of_common_text_of_textline_ocr_new(textline_image, ind_tot) - - image1 = textline_image[:, :width2,:]# image.crop((0, 0, width2, height)) - image2 = textline_image[:, width1:,:]#image.crop((width1, 0, width, height)) - + + image1 = textline_image[:, :width2, :] # image.crop((0, 0, width2, height)) + image2 = textline_image[:, width1:, :] #image.crop((width1, 0, width, height)) + pixel_values1 = processor(image1, return_tensors="pt").pixel_values pixel_values2 = processor(image2, return_tensors="pt").pixel_values - + generated_ids1 = model_ocr.generate(pixel_values1.to(device)) generated_ids2 = model_ocr.generate(pixel_values2.to(device)) - + generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0] generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0] #print(generated_text1,'generated_text1') #print(generated_text2, 'generated_text2') #print('########################################') - + match = sq(None, generated_text1, generated_text2).find_longest_match( 0, len(generated_text1), 0, len(generated_text2)) - generated_text = generated_text1 + generated_text2[match.b+match.size:] + generated_text = generated_text1 + generated_text2[match.b + match.size:] except: pixel_values = processor(textline_image, return_tensors="pt").pixel_values generated_ids = model_ocr.generate(pixel_values.to(device)) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] - + return generated_text - def return_textline_contour_with_added_box_coordinate(self, textline_contour, box_ind): - textline_contour[:,0] = textline_contour[:,0] + box_ind[2] - textline_contour[:,1] = textline_contour[:,1] + box_ind[0] + def return_textline_contour_with_added_box_coordinate(self, textline_contour, box_ind): + textline_contour[:, 0] = textline_contour[:, 0] + box_ind[2] + textline_contour[:, 1] = textline_contour[:, 1] + box_ind[0] return textline_contour def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] def return_it_in_two_groups(self, x_differential): - split = [ind if x_differential[ind]!=x_differential[ind+1] else -1 - for ind in range(len(x_differential)-1)] - split_masked = list( np.array(split[:])[np.array(split[:])!=-1] ) + split = [ind if x_differential[ind] != x_differential[ind + 1] else -1 + for ind in range(len(x_differential) - 1)] + split_masked = list(np.array(split[:])[np.array(split[:]) != -1]) if 0 not in split_masked: split_masked.insert(0, -1) - split_masked.append(len(x_differential)-1) + split_masked.append(len(x_differential) - 1) - split_masked = np.array(split_masked) +1 + split_masked = np.array(split_masked) + 1 - sums = [np.sum(x_differential[split_masked[ind]:split_masked[ind+1]]) - for ind in range(len(split_masked)-1)] + sums = [np.sum(x_differential[split_masked[ind]:split_masked[ind + 1]]) + for ind in range(len(split_masked) - 1)] - indexes_to_bec_changed = [ind if (np.abs(sums[ind-1]) > np.abs(sums[ind]) and - np.abs(sums[ind+1]) > np.abs(sums[ind])) else -1 - for ind in range(1,len(sums)-1)] - indexes_to_bec_changed_filtered = np.array(indexes_to_bec_changed)[np.array(indexes_to_bec_changed)!=-1] + indexes_to_bec_changed = [ind if (np.abs(sums[ind - 1]) > np.abs(sums[ind]) and + np.abs(sums[ind + 1]) > np.abs(sums[ind])) else -1 + for ind in range(1, len(sums) - 1)] + indexes_to_bec_changed_filtered = np.array(indexes_to_bec_changed)[np.array(indexes_to_bec_changed) != -1] x_differential_new = np.copy(x_differential) for i in indexes_to_bec_changed_filtered: - i_slice = slice(split_masked[i], split_masked[i+1]) + i_slice = slice(split_masked[i], split_masked[i + 1]) x_differential_new[i_slice] = -1 * np.array(x_differential)[i_slice] - + return x_differential_new def dilate_textregions_contours_textline_version(self, all_found_textline_polygons): @@ -3720,101 +3758,103 @@ class Eynollah: con_ind = all_found_textline_polygons[j][ij] area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - + + x_differential = np.diff(con_ind[:, 0, 0]) + y_differential = np.diff(con_ind[:, 0, 1]) + x_differential = gaussian_filter1d(x_differential, 0.1) y_differential = gaussian_filter1d(y_differential, 0.1) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] - y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] - - abs_diff=abs(abs(x_differential)- abs(y_differential) ) - - inc_x = np.zeros(len(x_differential)+1) - inc_y = np.zeros(len(x_differential)+1) - - if (y_max-y_min) <= (x_max-x_min): - dilation_m1 = round(area / (x_max-x_min) * 0.12) + + x_min = float(np.min(con_ind[:, 0, 0])) + y_min = float(np.min(con_ind[:, 0, 1])) + + x_max = float(np.max(con_ind[:, 0, 0])) + y_max = float(np.max(con_ind[:, 0, 1])) + + x_differential_mask_nonzeros = [ind / abs(ind) if ind != 0 else ind for ind in x_differential] + y_differential_mask_nonzeros = [ind / abs(ind) if ind != 0 else ind for ind in y_differential] + + abs_diff = abs(abs(x_differential) - abs(y_differential)) + + inc_x = np.zeros(len(x_differential) + 1) + inc_y = np.zeros(len(x_differential) + 1) + + if (y_max - y_min) <= (x_max - x_min): + dilation_m1 = round(area / (x_max - x_min) * 0.12) else: - dilation_m1 = round(area / (y_max-y_min) * 0.12) - - if dilation_m1>8: + dilation_m1 = round(area / (y_max - y_min) * 0.12) + + if dilation_m1 > 8: dilation_m1 = 8 - if dilation_m1<6: + if dilation_m1 < 6: dilation_m1 = 6 #print(dilation_m1, 'dilation_m1') dilation_m1 = 6 - dilation_m2 = int(dilation_m1/2.) +1 - + dilation_m2 = int(dilation_m1 / 2.) + 1 + for i in range(len(x_differential)): - if abs_diff[i]==0: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - - elif abs_diff[i]!=0 and abs_diff[i]>=3: - if abs(x_differential[i])>abs(y_differential[i]): - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) + if abs_diff[i] == 0: + inc_x[i + 1] = dilation_m2 * (-1 * y_differential_mask_nonzeros[i]) + inc_y[i + 1] = dilation_m2 * (x_differential_mask_nonzeros[i]) + elif abs_diff[i] != 0 and x_differential_mask_nonzeros[i] == 0 and y_differential_mask_nonzeros[ + i] != 0: + inc_x[i + 1] = dilation_m1 * (-1 * y_differential_mask_nonzeros[i]) + elif abs_diff[i] != 0 and x_differential_mask_nonzeros[i] != 0 and y_differential_mask_nonzeros[ + i] == 0: + inc_y[i + 1] = dilation_m1 * (x_differential_mask_nonzeros[i]) + + elif abs_diff[i] != 0 and abs_diff[i] >= 3: + if abs(x_differential[i]) > abs(y_differential[i]): + inc_y[i + 1] = dilation_m1 * (x_differential_mask_nonzeros[i]) else: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) + inc_x[i + 1] = dilation_m1 * (-1 * y_differential_mask_nonzeros[i]) else: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - + inc_x[i + 1] = dilation_m2 * (-1 * y_differential_mask_nonzeros[i]) + inc_y[i + 1] = dilation_m2 * (x_differential_mask_nonzeros[i]) + inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] - - con_scaled = con_ind*1 - - con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] - con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - + + con_scaled = con_ind * 1 + + con_scaled[:, 0, 0] = con_ind[:, 0, 0] + np.array(inc_x)[:] + con_scaled[:, 0, 1] = con_ind[:, 0, 1] + np.array(inc_y)[:] + + con_scaled[:, 0, 1][con_scaled[:, 0, 1] < 0] = 0 + con_scaled[:, 0, 0][con_scaled[:, 0, 0] < 0] = 0 + area_scaled = cv2.contourArea(con_scaled.astype(np.int32)) - + con_ind = con_ind.astype(np.int32) - - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) - for ind in range(len(con_scaled[:,0, 1])) ] + + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind, 0, 0], con_scaled[ind, 0, 1]), False) + for ind in range(len(con_scaled[:, 0, 1]))] results = np.array(results) #print(results,'results') - results[results==0] = 1 - + results[results == 0] = 1 + diff_result = np.diff(results) - - indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] - indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - - if results[0]==1: - con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] - con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] + + indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind] == 2] + indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind] == -2] + + if results[0] == 1: + con_scaled[:indices_m2[0] + 1, 0, 1] = con_ind[:indices_m2[0] + 1, 0, 1] + con_scaled[:indices_m2[0] + 1, 0, 0] = con_ind[:indices_m2[0] + 1, 0, 0] #indices_2 = indices_2[1:] indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): - con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] - con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] + + if len(indices_2) > len(indices_m2): + con_scaled[indices_2[-1] + 1:, 0, 1] = con_ind[indices_2[-1] + 1:, 0, 1] + con_scaled[indices_2[-1] + 1:, 0, 0] = con_ind[indices_2[-1] + 1:, 0, 0] indices_2 = indices_2[:-1] - + for ii in range(len(indices_2)): - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] + con_scaled[indices_2[ii] + 1:indices_m2[ii] + 1, 0, 1] = con_scaled[indices_2[ii], 0, 1] + con_scaled[indices_2[ii] + 1:indices_m2[ii] + 1, 0, 0] = con_scaled[indices_2[ii], 0, 0] - all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] + all_found_textline_polygons[j][ij][:, 0, 1] = con_scaled[:, 0, 1] + all_found_textline_polygons[j][ij][:, 0, 0] = con_scaled[:, 0, 0] return all_found_textline_polygons def dilate_textregions_contours(self, all_found_textline_polygons): @@ -3824,234 +3864,237 @@ class Eynollah: #print(len(con_ind[:,0,0]),'con_ind[:,0,0]') area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - + + x_differential = np.diff(con_ind[:, 0, 0]) + y_differential = np.diff(con_ind[:, 0, 1]) + x_differential = gaussian_filter1d(x_differential, 0.1) y_differential = gaussian_filter1d(y_differential, 0.1) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] - y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] - - abs_diff=abs(abs(x_differential)- abs(y_differential) ) - - inc_x = np.zeros(len(x_differential)+1) - inc_y = np.zeros(len(x_differential)+1) - - if (y_max-y_min) <= (x_max-x_min): - dilation_m1 = round(area / (x_max-x_min) * 0.12) + + x_min = float(np.min(con_ind[:, 0, 0])) + y_min = float(np.min(con_ind[:, 0, 1])) + + x_max = float(np.max(con_ind[:, 0, 0])) + y_max = float(np.max(con_ind[:, 0, 1])) + + x_differential_mask_nonzeros = [ind / abs(ind) if ind != 0 else ind for ind in x_differential] + y_differential_mask_nonzeros = [ind / abs(ind) if ind != 0 else ind for ind in y_differential] + + abs_diff = abs(abs(x_differential) - abs(y_differential)) + + inc_x = np.zeros(len(x_differential) + 1) + inc_y = np.zeros(len(x_differential) + 1) + + if (y_max - y_min) <= (x_max - x_min): + dilation_m1 = round(area / (x_max - x_min) * 0.12) else: - dilation_m1 = round(area / (y_max-y_min) * 0.12) - - if dilation_m1>8: + dilation_m1 = round(area / (y_max - y_min) * 0.12) + + if dilation_m1 > 8: dilation_m1 = 8 - if dilation_m1<6: + if dilation_m1 < 6: dilation_m1 = 6 #print(dilation_m1, 'dilation_m1') dilation_m1 = 6 - dilation_m2 = int(dilation_m1/2.) +1 - + dilation_m2 = int(dilation_m1 / 2.) + 1 + for i in range(len(x_differential)): - if abs_diff[i]==0: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - - elif abs_diff[i]!=0 and abs_diff[i]>=3: - if abs(x_differential[i])>abs(y_differential[i]): - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) + if abs_diff[i] == 0: + inc_x[i + 1] = dilation_m2 * (-1 * y_differential_mask_nonzeros[i]) + inc_y[i + 1] = dilation_m2 * (x_differential_mask_nonzeros[i]) + elif abs_diff[i] != 0 and x_differential_mask_nonzeros[i] == 0 and y_differential_mask_nonzeros[i] != 0: + inc_x[i + 1] = dilation_m1 * (-1 * y_differential_mask_nonzeros[i]) + elif abs_diff[i] != 0 and x_differential_mask_nonzeros[i] != 0 and y_differential_mask_nonzeros[i] == 0: + inc_y[i + 1] = dilation_m1 * (x_differential_mask_nonzeros[i]) + + elif abs_diff[i] != 0 and abs_diff[i] >= 3: + if abs(x_differential[i]) > abs(y_differential[i]): + inc_y[i + 1] = dilation_m1 * (x_differential_mask_nonzeros[i]) else: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) + inc_x[i + 1] = dilation_m1 * (-1 * y_differential_mask_nonzeros[i]) else: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - + inc_x[i + 1] = dilation_m2 * (-1 * y_differential_mask_nonzeros[i]) + inc_y[i + 1] = dilation_m2 * (x_differential_mask_nonzeros[i]) + inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] - - con_scaled = con_ind*1 - - con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] - con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - + + con_scaled = con_ind * 1 + + con_scaled[:, 0, 0] = con_ind[:, 0, 0] + np.array(inc_x)[:] + con_scaled[:, 0, 1] = con_ind[:, 0, 1] + np.array(inc_y)[:] + + con_scaled[:, 0, 1][con_scaled[:, 0, 1] < 0] = 0 + con_scaled[:, 0, 0][con_scaled[:, 0, 0] < 0] = 0 + area_scaled = cv2.contourArea(con_scaled.astype(np.int32)) - + con_ind = con_ind.astype(np.int32) - - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) - for ind in range(len(con_scaled[:,0, 1])) ] + + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind, 0, 0], con_scaled[ind, 0, 1]), False) + for ind in range(len(con_scaled[:, 0, 1]))] results = np.array(results) #print(results,'results') - results[results==0] = 1 - + results[results == 0] = 1 + diff_result = np.diff(results) - indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] - indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] + indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind] == 2] + indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind] == -2] - if results[0]==1: - con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] - con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] + if results[0] == 1: + con_scaled[:indices_m2[0] + 1, 0, 1] = con_ind[:indices_m2[0] + 1, 0, 1] + con_scaled[:indices_m2[0] + 1, 0, 0] = con_ind[:indices_m2[0] + 1, 0, 0] #indices_2 = indices_2[1:] indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): - con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] - con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] + + if len(indices_2) > len(indices_m2): + con_scaled[indices_2[-1] + 1:, 0, 1] = con_ind[indices_2[-1] + 1:, 0, 1] + con_scaled[indices_2[-1] + 1:, 0, 0] = con_ind[indices_2[-1] + 1:, 0, 0] indices_2 = indices_2[:-1] - + for ii in range(len(indices_2)): - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] + con_scaled[indices_2[ii] + 1:indices_m2[ii] + 1, 0, 1] = con_scaled[indices_2[ii], 0, 1] + con_scaled[indices_2[ii] + 1:indices_m2[ii] + 1, 0, 0] = con_scaled[indices_2[ii], 0, 0] - all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] + all_found_textline_polygons[j][:, 0, 1] = con_scaled[:, 0, 1] + all_found_textline_polygons[j][:, 0, 0] = con_scaled[:, 0, 0] return all_found_textline_polygons - + def dilate_textline_contours(self, all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): for ij in range(len(all_found_textline_polygons[j])): con_ind = all_found_textline_polygons[j][ij] area = cv2.contourArea(con_ind) - + con_ind = con_ind.astype(np.float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - + + x_differential = np.diff(con_ind[:, 0, 0]) + y_differential = np.diff(con_ind[:, 0, 1]) + x_differential = gaussian_filter1d(x_differential, 3) y_differential = gaussian_filter1d(y_differential, 3) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] - y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] - - abs_diff=abs(abs(x_differential)- abs(y_differential) ) - - inc_x = np.zeros(len(x_differential)+1) - inc_y = np.zeros(len(x_differential)+1) - - if (y_max-y_min) <= (x_max-x_min): - dilation_m1 = round(area / (x_max-x_min) * 0.35) + + x_min = float(np.min(con_ind[:, 0, 0])) + y_min = float(np.min(con_ind[:, 0, 1])) + + x_max = float(np.max(con_ind[:, 0, 0])) + y_max = float(np.max(con_ind[:, 0, 1])) + + x_differential_mask_nonzeros = [ind / abs(ind) if ind != 0 else ind for ind in x_differential] + y_differential_mask_nonzeros = [ind / abs(ind) if ind != 0 else ind for ind in y_differential] + + abs_diff = abs(abs(x_differential) - abs(y_differential)) + + inc_x = np.zeros(len(x_differential) + 1) + inc_y = np.zeros(len(x_differential) + 1) + + if (y_max - y_min) <= (x_max - x_min): + dilation_m1 = round(area / (x_max - x_min) * 0.35) else: - dilation_m1 = round(area / (y_max-y_min) * 0.35) - - if dilation_m1>12: + dilation_m1 = round(area / (y_max - y_min) * 0.35) + + if dilation_m1 > 12: dilation_m1 = 12 - if dilation_m1<4: + if dilation_m1 < 4: dilation_m1 = 4 #print(dilation_m1, 'dilation_m1') - dilation_m2 = int(dilation_m1/2.) +1 - + dilation_m2 = int(dilation_m1 / 2.) + 1 + for i in range(len(x_differential)): - if abs_diff[i]==0: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - - elif abs_diff[i]!=0 and abs_diff[i]>=3: - if abs(x_differential[i])>abs(y_differential[i]): - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) + if abs_diff[i] == 0: + inc_x[i + 1] = dilation_m2 * (-1 * y_differential_mask_nonzeros[i]) + inc_y[i + 1] = dilation_m2 * (x_differential_mask_nonzeros[i]) + elif abs_diff[i] != 0 and x_differential_mask_nonzeros[i] == 0 and y_differential_mask_nonzeros[ + i] != 0: + inc_x[i + 1] = dilation_m1 * (-1 * y_differential_mask_nonzeros[i]) + elif abs_diff[i] != 0 and x_differential_mask_nonzeros[i] != 0 and y_differential_mask_nonzeros[ + i] == 0: + inc_y[i + 1] = dilation_m1 * (x_differential_mask_nonzeros[i]) + + elif abs_diff[i] != 0 and abs_diff[i] >= 3: + if abs(x_differential[i]) > abs(y_differential[i]): + inc_y[i + 1] = dilation_m1 * (x_differential_mask_nonzeros[i]) else: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) + inc_x[i + 1] = dilation_m1 * (-1 * y_differential_mask_nonzeros[i]) else: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - + inc_x[i + 1] = dilation_m2 * (-1 * y_differential_mask_nonzeros[i]) + inc_y[i + 1] = dilation_m2 * (x_differential_mask_nonzeros[i]) + inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] - - con_scaled = con_ind*1 - - con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] - con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - + + con_scaled = con_ind * 1 + + con_scaled[:, 0, 0] = con_ind[:, 0, 0] + np.array(inc_x)[:] + con_scaled[:, 0, 1] = con_ind[:, 0, 1] + np.array(inc_y)[:] + + con_scaled[:, 0, 1][con_scaled[:, 0, 1] < 0] = 0 + con_scaled[:, 0, 0][con_scaled[:, 0, 0] < 0] = 0 + con_ind = con_ind.astype(np.int32) - - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) - for ind in range(len(con_scaled[:,0, 1])) ] + + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind, 0, 0], con_scaled[ind, 0, 1]), False) + for ind in range(len(con_scaled[:, 0, 1]))] results = np.array(results) - results[results==0] = 1 - + results[results == 0] = 1 + diff_result = np.diff(results) - - indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] - indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - - if results[0]==1: - con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] - con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] + + indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind] == 2] + indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind] == -2] + + if results[0] == 1: + con_scaled[:indices_m2[0] + 1, 0, 1] = con_ind[:indices_m2[0] + 1, 0, 1] + con_scaled[:indices_m2[0] + 1, 0, 0] = con_ind[:indices_m2[0] + 1, 0, 0] indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): - con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] - con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] + + if len(indices_2) > len(indices_m2): + con_scaled[indices_2[-1] + 1:, 0, 1] = con_ind[indices_2[-1] + 1:, 0, 1] + con_scaled[indices_2[-1] + 1:, 0, 0] = con_ind[indices_2[-1] + 1:, 0, 0] indices_2 = indices_2[:-1] - + for ii in range(len(indices_2)): - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - - all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] + con_scaled[indices_2[ii] + 1:indices_m2[ii] + 1, 0, 1] = con_scaled[indices_2[ii], 0, 1] + con_scaled[indices_2[ii] + 1:indices_m2[ii] + 1, 0, 0] = con_scaled[indices_2[ii], 0, 0] + + all_found_textline_polygons[j][ij][:, 0, 1] = con_scaled[:, 0, 1] + all_found_textline_polygons[j][ij][:, 0, 0] = con_scaled[:, 0, 0] return all_found_textline_polygons - - def filter_contours_inside_a_bigger_one(self,contours, image, marginal_cnts=None, type_contour="textregion"): - if type_contour=="textregion": + + def filter_contours_inside_a_bigger_one(self, contours, image, marginal_cnts=None, type_contour="textregion"): + if type_contour == "textregion": areas = [cv2.contourArea(contours[j]) for j in range(len(contours))] - area_tot = image.shape[0]*image.shape[1] - + area_tot = image.shape[0] * image.shape[1] + M_main = [cv2.moments(contours[j]) for j in range(len(contours))] cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - - areas_ratio = np.array(areas)/ area_tot + + areas_ratio = np.array(areas) / area_tot contours_index_small = [ind for ind in range(len(contours)) if areas_ratio[ind] < 1e-3] - contours_index_big = [ind for ind in range(len(contours)) if areas_ratio[ind] >= 1e-3] - + contours_index_big = [ind for ind in range(len(contours)) if areas_ratio[ind] >= 1e-3] + #contours_> = [contours[ind] for ind in contours_index_big] indexes_to_be_removed = [] for ind_small in contours_index_small: results = [cv2.pointPolygonTest(contours[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in contours_index_big] if marginal_cnts: - results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], (cx_main[ind_small], cy_main[ind_small]), False) - for ind in range(len(marginal_cnts))] + results_marginal = [ + cv2.pointPolygonTest(marginal_cnts[ind], (cx_main[ind_small], cy_main[ind_small]), False) + for ind in range(len(marginal_cnts))] results_marginal = np.array(results_marginal) - - if np.any(results_marginal==1): + + if np.any(results_marginal == 1): indexes_to_be_removed.append(ind_small) - + results = np.array(results) - - if np.any(results==1): + + if np.any(results == 1): indexes_to_be_removed.append(ind_small) - - if len(indexes_to_be_removed)>0: + + if len(indexes_to_be_removed) > 0: indexes_to_be_removed = np.unique(indexes_to_be_removed) indexes_to_be_removed = np.sort(indexes_to_be_removed)[::-1] for ind in indexes_to_be_removed: @@ -4063,41 +4106,42 @@ class Eynollah: contours_txtline_of_all_textregions = [] indexes_of_textline_tot = [] index_textline_inside_textregion = [] - + for jj in range(len(contours)): contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours[jj] - + ind_textline_inside_tr = list(range(len(contours[jj]))) index_textline_inside_textregion = index_textline_inside_textregion + ind_textline_inside_tr #ind_ins = [0] * len(contours[jj]) + jj - ind_ins = np.zeros( len(contours[jj]) ) + jj + ind_ins = np.zeros(len(contours[jj])) + jj list_ind_ins = list(ind_ins) indexes_of_textline_tot = indexes_of_textline_tot + list_ind_ins - + M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j]) for j in range(len(contours_txtline_of_all_textregions))] cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - + areas_tot = [cv2.contourArea(con_ind) for con_ind in contours_txtline_of_all_textregions] - area_tot_tot = image.shape[0]*image.shape[1] - + area_tot_tot = image.shape[0] * image.shape[1] + textregion_index_to_del = [] textline_in_textregion_index_to_del = [] for ij in range(len(contours_txtline_of_all_textregions)): args_all = list(np.array(range(len(contours_txtline_of_all_textregions)))) args_all.pop(ij) - + areas_without = np.array(areas_tot)[args_all] area_of_con_interest = areas_tot[ij] - - args_with_bigger_area = np.array(args_all)[areas_without > 1.5*area_of_con_interest] - - if len(args_with_bigger_area)>0: - results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False) - for ind in args_with_bigger_area ] + + args_with_bigger_area = np.array(args_all)[areas_without > 1.5 * area_of_con_interest] + + if len(args_with_bigger_area) > 0: + results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], + (cx_main_tot[ij], cy_main_tot[ij]), False) + for ind in args_with_bigger_area] results = np.array(results) - if np.any(results==1): + if np.any(results == 1): #print(indexes_of_textline_tot[ij], index_textline_inside_textregion[ij]) textregion_index_to_del.append(int(indexes_of_textline_tot[ij])) textline_in_textregion_index_to_del.append(int(index_textline_inside_textregion[ij])) @@ -4106,171 +4150,173 @@ class Eynollah: textregion_index_to_del = np.array(textregion_index_to_del) textline_in_textregion_index_to_del = np.array(textline_in_textregion_index_to_del) for ind_u_a_trs in np.unique(textregion_index_to_del): - textline_in_textregion_index_to_del_ind = textline_in_textregion_index_to_del[textregion_index_to_del==ind_u_a_trs] + textline_in_textregion_index_to_del_ind = textline_in_textregion_index_to_del[ + textregion_index_to_del == ind_u_a_trs] textline_in_textregion_index_to_del_ind = np.sort(textline_in_textregion_index_to_del_ind)[::-1] for ittrd in textline_in_textregion_index_to_del_ind: contours[ind_u_a_trs].pop(ittrd) - + return contours - + def filter_contours_without_textline_inside( - self, contours,text_con_org, contours_textline, contours_only_text_parent_d_ordered): - + self, contours, text_con_org, contours_textline, contours_only_text_parent_d_ordered): + ###contours_txtline_of_all_textregions = [] ###for jj in range(len(contours_textline)): - ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj] - + ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj] + ###M_main_textline = [cv2.moments(contours_txtline_of_all_textregions[j]) ### for j in range(len(contours_txtline_of_all_textregions))] ###cx_main_textline = [(M_main_textline[j]["m10"] / (M_main_textline[j]["m00"] + 1e-32)) ### for j in range(len(M_main_textline))] ###cy_main_textline = [(M_main_textline[j]["m01"] / (M_main_textline[j]["m00"] + 1e-32)) ### for j in range(len(M_main_textline))] - + ###M_main = [cv2.moments(contours[j]) for j in range(len(contours))] ###cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] ###cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - + ###contours_with_textline = [] ###for ind_tr, con_tr in enumerate(contours): - ###results = [cv2.pointPolygonTest(con_tr, (cx_main_textline[index_textline_con], cy_main_textline[index_textline_con]), False) + ###results = [cv2.pointPolygonTest(con_tr, (cx_main_textline[index_textline_con], cy_main_textline[index_textline_con]), False) ### for index_textline_con in range(len(contours_txtline_of_all_textregions)) ] - ###results = np.array(results) - ###if np.any(results==1): - ###contours_with_textline.append(con_tr) - + ###results = np.array(results) + ###if np.any(results==1): + ###contours_with_textline.append(con_tr) + textregion_index_to_del = [] for index_textregion, textlines_textregion in enumerate(contours_textline): - if len(textlines_textregion)==0: + if len(textlines_textregion) == 0: textregion_index_to_del.append(index_textregion) uniqe_args_trs = np.unique(textregion_index_to_del) uniqe_args_trs_sorted = np.sort(uniqe_args_trs)[::-1] - + for ind_u_a_trs in uniqe_args_trs_sorted: contours.pop(ind_u_a_trs) contours_textline.pop(ind_u_a_trs) text_con_org.pop(ind_u_a_trs) if len(contours_only_text_parent_d_ordered) > 0: contours_only_text_parent_d_ordered.pop(ind_u_a_trs) - - return contours, text_con_org, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours))) - + + return contours, text_con_org, contours_textline, contours_only_text_parent_d_ordered, np.array( + range(len(contours))) + def dilate_textlines(self, all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): for i in range(len(all_found_textline_polygons[j])): con_ind = all_found_textline_polygons[j][i] con_ind = con_ind.astype(np.float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: + x_differential = np.diff(con_ind[:, 0, 0]) + y_differential = np.diff(con_ind[:, 0, 1]) + + x_min = float(np.min(con_ind[:, 0, 0])) + y_min = float(np.min(con_ind[:, 0, 1])) + + x_max = float(np.max(con_ind[:, 0, 0])) + y_max = float(np.max(con_ind[:, 0, 1])) + + if (y_max - y_min) > (x_max - x_min) and (x_max - x_min) < 70: x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) - mult = x_biger_than_x*x_differential - + mult = x_biger_than_x * x_differential + arg_min_mult = np.argmin(mult) arg_max_mult = np.argmax(mult) - - if y_differential[0]==0: + + if y_differential[0] == 0: y_differential[0] = 0.1 - if y_differential[-1]==0: - y_differential[-1]= 0.1 + if y_differential[-1] == 0: + y_differential[-1] = 0.1 y_differential = [y_differential[ind] if y_differential[ind] != 0 - else 0.5 * (y_differential[ind-1] + y_differential[ind+1]) + else 0.5 * (y_differential[ind - 1] + y_differential[ind + 1]) for ind in range(len(y_differential))] - - if y_differential[0]==0.1: + + if y_differential[0] == 0.1: y_differential[0] = y_differential[1] - if y_differential[-1]==0.1: + if y_differential[-1] == 0.1: y_differential[-1] = y_differential[-2] y_differential.append(y_differential[0]) - + y_differential = [-1 if y_differential[ind] < 0 else 1 for ind in range(len(y_differential))] y_differential = self.return_it_in_two_groups(y_differential) y_differential = np.array(y_differential) - - con_scaled = con_ind*1 - con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential - con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 - con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 - + + con_scaled = con_ind * 1 + con_scaled[:, 0, 0] = con_ind[:, 0, 0] - 8 * y_differential + con_scaled[arg_min_mult, 0, 1] = con_ind[arg_min_mult, 0, 1] + 8 + con_scaled[arg_min_mult + 1, 0, 1] = con_ind[arg_min_mult + 1, 0, 1] + 8 + try: - con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5 - con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5 + con_scaled[arg_min_mult - 1, 0, 1] = con_ind[arg_min_mult - 1, 0, 1] + 5 + con_scaled[arg_min_mult + 2, 0, 1] = con_ind[arg_min_mult + 2, 0, 1] + 5 except: pass - - con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8 - con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8 - + + con_scaled[arg_max_mult, 0, 1] = con_ind[arg_max_mult, 0, 1] - 8 + con_scaled[arg_max_mult + 1, 0, 1] = con_ind[arg_max_mult + 1, 0, 1] - 8 + try: - con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5 - con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5 + con_scaled[arg_max_mult - 1, 0, 1] = con_ind[arg_max_mult - 1, 0, 1] - 5 + con_scaled[arg_max_mult + 2, 0, 1] = con_ind[arg_max_mult + 2, 0, 1] - 5 except: pass - + else: y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) - mult = y_biger_than_x*y_differential - + mult = y_biger_than_x * y_differential + arg_min_mult = np.argmin(mult) arg_max_mult = np.argmax(mult) - - if x_differential[0]==0: + + if x_differential[0] == 0: x_differential[0] = 0.1 - if x_differential[-1]==0: - x_differential[-1]= 0.1 + if x_differential[-1] == 0: + x_differential[-1] = 0.1 x_differential = [x_differential[ind] if x_differential[ind] != 0 - else 0.5 * (x_differential[ind-1] + x_differential[ind+1]) + else 0.5 * (x_differential[ind - 1] + x_differential[ind + 1]) for ind in range(len(x_differential))] - - if x_differential[0]==0.1: + + if x_differential[0] == 0.1: x_differential[0] = x_differential[1] - if x_differential[-1]==0.1: + if x_differential[-1] == 0.1: x_differential[-1] = x_differential[-2] x_differential.append(x_differential[0]) - + x_differential = [-1 if x_differential[ind] < 0 else 1 for ind in range(len(x_differential))] x_differential = self.return_it_in_two_groups(x_differential) x_differential = np.array(x_differential) - - con_scaled = con_ind*1 - con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential - con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 - con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 - + + con_scaled = con_ind * 1 + con_scaled[:, 0, 1] = con_ind[:, 0, 1] + 8 * x_differential + con_scaled[arg_min_mult, 0, 0] = con_ind[arg_min_mult, 0, 0] + 8 + con_scaled[arg_min_mult + 1, 0, 0] = con_ind[arg_min_mult + 1, 0, 0] + 8 + try: - con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5 - con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5 + con_scaled[arg_min_mult - 1, 0, 0] = con_ind[arg_min_mult - 1, 0, 0] + 5 + con_scaled[arg_min_mult + 2, 0, 0] = con_ind[arg_min_mult + 2, 0, 0] + 5 except: pass - - con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8 - con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8 - + + con_scaled[arg_max_mult, 0, 0] = con_ind[arg_max_mult, 0, 0] - 8 + con_scaled[arg_max_mult + 1, 0, 0] = con_ind[arg_max_mult + 1, 0, 0] - 8 + try: - con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5 - con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 + con_scaled[arg_max_mult - 1, 0, 0] = con_ind[arg_max_mult - 1, 0, 0] - 5 + con_scaled[arg_max_mult + 2, 0, 0] = con_ind[arg_max_mult + 2, 0, 0] - 5 except: pass - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - all_found_textline_polygons[j][i][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0] + con_scaled[:, 0, 1][con_scaled[:, 0, 1] < 0] = 0 + con_scaled[:, 0, 0][con_scaled[:, 0, 0] < 0] = 0 + + all_found_textline_polygons[j][i][:, 0, 1] = con_scaled[:, 0, 1] + all_found_textline_polygons[j][i][:, 0, 0] = con_scaled[:, 0, 0] return all_found_textline_polygons - + def delete_regions_without_textlines( self, slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con): @@ -4281,7 +4327,7 @@ class Eynollah: txt_con_org_rem = [] contours_only_text_parent_rem = [] index_by_text_par_con_rem = [] - + for i, ind_con in enumerate(all_found_textline_polygons): if len(ind_con): all_found_textline_polygons_rem.append(ind_con) @@ -4290,13 +4336,13 @@ class Eynollah: txt_con_org_rem.append(txt_con_org[i]) contours_only_text_parent_rem.append(contours_only_text_parent[i]) index_by_text_par_con_rem.append(index_by_text_par_con[i]) - + index_sort = np.argsort(index_by_text_par_con_rem) indexes_new = np.array(range(len(index_by_text_par_con_rem))) - - index_by_text_par_con_rem_sort = [indexes_new[index_sort==j][0] + + index_by_text_par_con_rem_sort = [indexes_new[index_sort == j][0] for j in range(len(index_by_text_par_con_rem))] - + return (slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem, contours_only_text_parent_rem, index_by_text_par_con_rem_sort) @@ -4310,12 +4356,12 @@ class Eynollah: if not self.dir_in: self.ls_imgs = [1] - + for img_name in self.ls_imgs: self.logger.info(img_name) t0 = time.time() if self.dir_in: - self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) + self.reset_file_name_dir(os.path.join(self.dir_in, img_name)) #print("text region early -11 in %.1fs", time.time() - t0) if os.path.exists(self.writer.output_filename): @@ -4324,8 +4370,9 @@ class Eynollah: else: self.logger.warning("will skip input for existing output file '%s'", self.writer.output_filename) continue - - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) + + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement( + self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) if self.extract_only_images: text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \ @@ -4345,31 +4392,29 @@ class Eynollah: return pcgts if self.skip_layout_and_reading_order: - _ ,_, _, textline_mask_tot_ea, img_bin_light = \ + _, _, _, textline_mask_tot_ea, img_bin_light = \ self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = \ self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) - ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea) cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) all_found_textline_polygons = filter_contours_area_of_image( textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - all_found_textline_polygons=[ all_found_textline_polygons ] + all_found_textline_polygons = [all_found_textline_polygons] all_found_textline_polygons = self.dilate_textregions_contours_textline_version( all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") - order_text_new = [0] - slopes =[0] - id_of_texts_tot =['region_0001'] + slopes = [0] + id_of_texts_tot = ['region_0001'] polygons_of_images = [] slopes_marginals = [] @@ -4393,18 +4438,18 @@ class Eynollah: #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = \ + text_regions_p_1, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = \ self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) #print("text region early -2 in %.1fs", time.time() - t0) - if num_col_classifier == 1 or num_col_classifier ==2: + if num_col_classifier == 1 or num_col_classifier == 2: if num_col_classifier == 1: img_w_new = 1000 else: img_w_new = 1300 img_h_new = img_w_new * textline_mask_tot_ea.shape[0] // textline_mask_tot_ea.shape[1] - textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea, img_h_new, img_w_new) slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) else: @@ -4413,14 +4458,15 @@ class Eynollah: #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, - num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, + num_col_classifier, num_column_is_classified, erosion_hurts, + img_bin_light) #self.logger.info("run graphics %.1fs ", time.time() - t1t) #print("text region early -3 in %.1fs", time.time() - t0) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) #print("text region early -4 in %.1fs", time.time() - t0) else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \ + text_regions_p_1, erosion_hurts, polygons_lines_xml = \ self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) @@ -4428,7 +4474,8 @@ class Eynollah: t1 = time.time() num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, + erosion_hurts) self.logger.info("Graphics detection took %.1fs ", time.time() - t1) #self.logger.info('cont_page %s', cont_page) #plt.imshow(table_prediction) @@ -4455,7 +4502,7 @@ class Eynollah: t1 = time.time() slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) self.logger.info("deskewing took %.1fs", time.time() - t1) - elif num_col_classifier in (1,2): + elif num_col_classifier in (1, 2): org_h_l_m = textline_mask_tot_ea.shape[0] org_w_l_m = textline_mask_tot_ea.shape[1] if num_col_classifier == 1: @@ -4464,25 +4511,25 @@ class Eynollah: img_w_new = 2400 img_h_new = img_w_new * textline_mask_tot_ea.shape[0] // textline_mask_tot_ea.shape[1] - image_page = resize_image(image_page,img_h_new, img_w_new ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - mask_images = resize_image(mask_images,img_h_new, img_w_new ) - mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) - text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) - table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) + image_page = resize_image(image_page, img_h_new, img_w_new) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea, img_h_new, img_w_new) + mask_images = resize_image(mask_images, img_h_new, img_w_new) + mask_lines = resize_image(mask_lines, img_h_new, img_w_new) + text_regions_p_1 = resize_image(text_regions_p_1, img_h_new, img_w_new) + table_prediction = resize_image(table_prediction, img_h_new, img_w_new) textline_mask_tot, text_regions_p, image_page_rotated = \ self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) - if self.light_version and num_col_classifier in (1,2): - image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) - text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) - textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) - text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) - table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) - image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) + if self.light_version and num_col_classifier in (1, 2): + image_page = resize_image(image_page, org_h_l_m, org_w_l_m) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea, org_h_l_m, org_w_l_m) + text_regions_p = resize_image(text_regions_p, org_h_l_m, org_w_l_m) + textline_mask_tot = resize_image(textline_mask_tot, org_h_l_m, org_w_l_m) + text_regions_p_1 = resize_image(text_regions_p_1, org_h_l_m, org_w_l_m) + table_prediction = resize_image(table_prediction, org_h_l_m, org_w_l_m) + image_page_rotated = resize_image(image_page_rotated, org_h_l_m, org_w_l_m) self.logger.info("detection of marginals took %.1fs", time.time() - t1) #print("text region early 2 marginal in %.1fs", time.time() - t0) @@ -4503,8 +4550,7 @@ class Eynollah: ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) if self.light_version: drop_label_in_full_layout = 4 - textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 - + textline_mask_tot_ea_org[img_revised_tab == drop_label_in_full_layout] = 0 text_only = (img_revised_tab[:, :] == 1) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: @@ -4528,11 +4574,11 @@ class Eynollah: contours_only_text_parent, index_con_parents) ##try: - ##contours_only_text_parent = \ - ##list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##contours_only_text_parent = \ + ##list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) ##except: - ##contours_only_text_parent = \ - ##list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##contours_only_text_parent = \ + ##list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) areas_cnt_text_parent = self.return_list_of_contours_with_desired_order( areas_cnt_text_parent, index_con_parents) @@ -4547,23 +4593,25 @@ class Eynollah: areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) - if len(areas_cnt_text_d)>0: + if len(areas_cnt_text_d) > 0: contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] index_con_parents_d = np.argsort(areas_cnt_text_d) contours_only_text_parent_d = self.return_list_of_contours_with_desired_order( contours_only_text_parent_d, index_con_parents_d) #try: - #contours_only_text_parent_d = \ - #list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + #contours_only_text_parent_d = \ + #list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) #except: - #contours_only_text_parent_d = \ - #list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) + #contours_only_text_parent_d = \ + #list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) areas_cnt_text_d = self.return_list_of_contours_with_desired_order( areas_cnt_text_d, index_con_parents_d) - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) + cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours( + [contours_biggest_d]) + cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours( + contours_only_text_parent_d) try: if len(cx_bigest_d) >= 5: cx_bigest_d_last5 = cx_bigest_d[-5:] @@ -4571,12 +4619,12 @@ class Eynollah: dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) + ind_largest = len(cx_bigest_d) - 5 + np.argmin(dists_d) else: cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + - (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) @@ -4661,12 +4709,12 @@ class Eynollah: if self.textline_light: all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light2( - txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, - image_page_rotated, boxes_text, slope_deskew) + txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, + image_page_rotated, boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light2( - polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, - image_page_rotated, boxes_marginals, slope_deskew) + polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, + image_page_rotated, boxes_marginals, slope_deskew) #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, @@ -4684,44 +4732,45 @@ class Eynollah: all_found_textline_polygons_marginals) contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, \ index_by_text_par_con = self.filter_contours_without_textline_inside( - contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered) + contours_only_text_parent, txt_con_org, all_found_textline_polygons, + contours_only_text_parent_d_ordered) else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \ index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light( - txt_con_org, contours_only_text_parent, textline_mask_tot_ea, - image_page_rotated, boxes_text, slope_deskew) + txt_con_org, contours_only_text_parent, textline_mask_tot_ea, + image_page_rotated, boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light( - polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, - image_page_rotated, boxes_marginals, slope_deskew) + polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, + image_page_rotated, boxes_marginals, slope_deskew) #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( # all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new( - txt_con_org, contours_only_text_parent, textline_mask_tot_ea, - image_page_rotated, boxes_text, slope_deskew) + txt_con_org, contours_only_text_parent, textline_mask_tot_ea, + image_page_rotated, boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new( - polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, - image_page_rotated, boxes_marginals, slope_deskew) + polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, + image_page_rotated, boxes_marginals, slope_deskew) else: scale_param = 1 textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved( - txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, - image_page_rotated, boxes_text, text_only, - num_col_classifier, scale_param, slope_deskew) + txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, + image_page_rotated, boxes_text, text_only, + num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons = small_textlines_to_parent_adherence2( all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved( - polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, - image_page_rotated, boxes_marginals, text_only, - num_col_classifier, scale_param, slope_deskew) + polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, + image_page_rotated, boxes_marginals, text_only, + num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2( all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) @@ -4731,11 +4780,11 @@ class Eynollah: contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order( contours_only_text_parent_d_ordered, index_by_text_par_con) #try: - #contours_only_text_parent_d_ordered = \ - #list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #contours_only_text_parent_d_ordered = \ + #list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) #except: - #contours_only_text_parent_d_ordered = \ - #list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #contours_only_text_parent_d_ordered = \ + #list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) else: #takes long timee contours_only_text_parent_d_ordered = None @@ -4746,8 +4795,8 @@ class Eynollah: text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \ all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = fun( - text_regions_p, regions_fully, contours_only_text_parent, - all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + text_regions_p, regions_fully, contours_only_text_parent, + all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) if self.plotter: self.plotter.save_plot_of_layout(text_regions_p, image_page) @@ -4766,7 +4815,7 @@ class Eynollah: if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h) + num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h) else: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), @@ -4775,7 +4824,7 @@ class Eynollah: if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_seps) + num_col_classifier, self.tables, pixel_seps) else: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), @@ -4784,10 +4833,12 @@ class Eynollah: if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, + iterations=6) else: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, + iterations=6) if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( @@ -4796,7 +4847,7 @@ class Eynollah: else: boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, - num_col_classifier, erosion_hurts, self.tables, self.right2left) + num_col_classifier, erosion_hurts, self.tables, self.right2left) if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) @@ -4812,7 +4863,8 @@ class Eynollah: contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: order_text_new, id_of_texts_tot = self.do_order_of_regions( - contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, + textline_mask_tot_d) self.logger.info("detection of reading order took %.1fs", time.time() - t_order) if self.ocr: @@ -4846,13 +4898,14 @@ class Eynollah: contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order( contours_only_text_parent_d_ordered, index_by_text_par_con) #try: - #contours_only_text_parent_d_ordered = \ - #list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #contours_only_text_parent_d_ordered = \ + #list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) #except: - #contours_only_text_parent_d_ordered = \ - #list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #contours_only_text_parent_d_ordered = \ + #list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions( - contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, + textline_mask_tot_d) if self.ocr: device = cuda.get_current_device() @@ -4877,11 +4930,11 @@ class Eynollah: #print(box_ind) ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) #print(ind_poly_copy) - ind_poly[ind_poly<0] = 0 + ind_poly[ind_poly < 0] = 0 x, y, w, h = cv2.boundingRect(ind_poly) #print(ind_poly_copy, np.shape(ind_poly_copy)) #print(x, y, w, h, h/float(w),'ratio') - h2w_ratio = h/float(w) + h2w_ratio = h / float(w) mask_poly = np.zeros(image_page.shape) if not self.light_version: img_poly_on_img = np.copy(image_page) @@ -4891,15 +4944,17 @@ class Eynollah: if self.textline_light: mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) - img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:, :, 0][mask_poly[:, :, 0] == 0] = 255 + img_poly_on_img[:, :, 1][mask_poly[:, :, 0] == 0] = 255 + img_poly_on_img[:, :, 2][mask_poly[:, :, 0] == 0] = 255 - img_croped = img_poly_on_img[y:y+h, x:x+w, :] + img_croped = img_poly_on_img[y:y + h, x:x + w, :] #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) - text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, + processor, device, w, + h2w_ratio, ind_tot) ocr_textline_in_textregion.append(text_ocr) - ind_tot = ind_tot +1 + ind_tot = ind_tot + 1 ocr_all_textlines.append(ocr_textline_in_textregion) else: @@ -4921,23 +4976,23 @@ class Eynollah: self.writer.write_pagexml(pcgts) self.logger.info("Job done in %.1fs", time.time() - t0) #print("Job done in %.1fs" % (time.time() - t0)) - + if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) print("all Job done in %.1fs", time.time() - t0_tot) - - + + class Eynollah_ocr: def __init__( - self, - dir_models, - dir_xmls=None, - dir_in=None, - dir_out=None, - tr_ocr=False, - export_textline_images_and_text=False, - do_not_mask_with_textline_contour=False, - logger=None, + self, + dir_models, + dir_xmls=None, + dir_in=None, + dir_out=None, + tr_ocr=False, + export_textline_images_and_text=False, + do_not_mask_with_textline_contour=False, + logger=None, ): self.dir_in = dir_in self.dir_out = dir_out @@ -4954,18 +5009,16 @@ class Eynollah_ocr: self.model_ocr.to(self.device) else: - self.model_ocr_dir = dir_models + "/model_3_new_ocrcnn"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" - model_ocr = load_model(self.model_ocr_dir , compile=False) - + self.model_ocr_dir = dir_models + "/model_3_new_ocrcnn" #"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" + model_ocr = load_model(self.model_ocr_dir, compile=False) + self.prediction_model = tf.keras.models.Model( - model_ocr.get_layer(name = "image").input, - model_ocr.get_layer(name = "dense2").output) + model_ocr.get_layer(name="image").input, + model_ocr.get_layer(name="dense2").output) - - with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file: + with open(os.path.join(self.model_ocr_dir, "characters_org.txt"), "r") as config_file: characters = json.load(config_file) - AUTOTUNE = tf.data.AUTOTUNE # Mapping characters to integers. @@ -4975,23 +5028,23 @@ class Eynollah_ocr: self.num_to_char = StringLookup( vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True ) - - def decode_batch_predictions(self, pred, max_len = 128): + + def decode_batch_predictions(self, pred, max_len=128): # input_len is the product of the batch size and the # number of time steps. input_len = np.ones(pred.shape[0]) * pred.shape[1] - + # Decode CTC predictions using greedy search. # decoded is a tuple with 2 elements. - decoded = tf.keras.backend.ctc_decode(pred, - input_length = input_len, - beam_width = 100) + decoded = tf.keras.backend.ctc_decode(pred, + input_length=input_len, + beam_width=100) # The outputs are in the first element of the tuple. # Additionally, the first element is actually a list, # therefore we take the first element of that list as well. #print(decoded,'decoded') decoded = decoded[0][0][:, :max_len] - + #print(decoded, decoded.shape,'decoded') output = [] @@ -5001,8 +5054,7 @@ class Eynollah_ocr: d = d.numpy().decode("utf-8") output.append(d) return output - - + def distortion_free_resize(self, image, img_size): w, h = img_size image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True) @@ -5038,80 +5090,79 @@ class Eynollah_ocr: image = tf.transpose(image, (1, 0, 2)) image = tf.image.flip_left_right(image) return image - + def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] - common_window = int(0.06*width) + common_window = int(0.06 * width) + + width1 = int(width / 2. - common_window) + width2 = int(width / 2. + common_window) - width1 = int ( width/2. - common_window ) - width2 = int ( width/2. + common_window ) - - img_sum = np.sum(textline_image[:,:,0], axis=0) + img_sum = np.sum(textline_image[:, :, 0], axis=0) sum_smoothed = gaussian_filter1d(img_sum, 3) - + peaks_real, _ = find_peaks(sum_smoothed, height=0) - - if len(peaks_real)>70: - peaks_real = peaks_real[(peaks_realwidth1)] + if len(peaks_real) > 70: + + peaks_real = peaks_real[(peaks_real < width2) & (peaks_real > width1)] arg_max = np.argmax(sum_smoothed[peaks_real]) peaks_final = peaks_real[arg_max] - + return peaks_final else: return None - + def return_textlines_split_if_needed(self, textline_image): split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image) if split_point: - image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height)) - image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height)) + image1 = textline_image[:, :split_point, :] # image.crop((0, 0, width2, height)) + image2 = textline_image[:, split_point:, :] #image.crop((width1, 0, width, height)) return [image1, image2] else: return None + def preprocess_and_resize_image_for_ocrcnn_model(self, img, image_height, image_width): - ratio = image_height /float(img.shape[0]) + ratio = image_height / float(img.shape[0]) w_ratio = int(ratio * img.shape[1]) if w_ratio <= image_width: width_new = w_ratio else: width_new = image_width img = resize_image(img, image_height, width_new) - img_fin = np.ones((image_height, image_width, 3))*255 - img_fin[:,:width_new,:] = img[:,:,:] + img_fin = np.ones((image_height, image_width, 3)) * 255 + img_fin[:, :width_new, :] = img[:, :, :] img_fin = img_fin / 255. return img_fin - + def run(self): ls_imgs = os.listdir(self.dir_in) - + if self.tr_ocr: b_s = 2 for ind_img in ls_imgs: t0 = time.time() file_name = ind_img.split('.')[0] dir_img = os.path.join(self.dir_in, ind_img) - dir_xml = os.path.join(self.dir_xmls, file_name+'.xml') - out_file_ocr = os.path.join(self.dir_out, file_name+'.xml') + dir_xml = os.path.join(self.dir_xmls, file_name + '.xml') + out_file_ocr = os.path.join(self.dir_out, file_name + '.xml') img = cv2.imread(dir_img) ##file_name = Path(dir_xmls).stem - tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding="utf-8")) - root1=tree1.getroot() - alltags=[elem.tag for elem in root1.iter()] - link=alltags[0].split('}')[0]+'}' + tree1 = ET.parse(dir_xml, parser=ET.XMLParser(encoding="utf-8")) + root1 = tree1.getroot() + alltags = [elem.tag for elem in root1.iter()] + link = alltags[0].split('}')[0] + '}' name_space = alltags[0].split('}')[0] name_space = name_space.split('{')[1] - region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')]) - - - + region_tags = np.unique([x for x in alltags if x.endswith('TextRegion')]) + cropped_lines = [] cropped_lines_region_indexer = [] cropped_lines_meging_indexing = [] @@ -5120,24 +5171,25 @@ class Eynollah_ocr: for nn in root1.iter(region_tags): for child_textregion in nn: if child_textregion.tag.endswith("TextLine"): - + for child_textlines in child_textregion: if child_textlines.tag.endswith("Coords"): cropped_lines_region_indexer.append(indexer_text_region) - p_h=child_textlines.attrib['points'].split(' ') - textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) - x,y,w,h = cv2.boundingRect(textline_coords) - - h2w_ratio = h/float(w) - + p_h = child_textlines.attrib['points'].split(' ') + textline_coords = np.array( + [[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h]) + x, y, w, h = cv2.boundingRect(textline_coords) + + h2w_ratio = h / float(w) + img_poly_on_img = np.copy(img) mask_poly = np.zeros(img.shape) mask_poly = cv2.fillPoly(mask_poly, pts=[textline_coords], color=(1, 1, 1)) - - mask_poly = mask_poly[y:y+h, x:x+w, :] - img_crop = img_poly_on_img[y:y+h, x:x+w, :] - img_crop[mask_poly==0] = 255 - + + mask_poly = mask_poly[y:y + h, x:x + w, :] + img_crop = img_poly_on_img[y:y + h, x:x + w, :] + img_crop[mask_poly == 0] = 255 + if h2w_ratio > 0.05: cropped_lines.append(img_crop) cropped_lines_meging_indexing.append(0) @@ -5152,27 +5204,30 @@ class Eynollah_ocr: else: cropped_lines.append(img_crop) cropped_lines_meging_indexing.append(0) - indexer_text_region = indexer_text_region +1 - - + indexer_text_region = indexer_text_region + 1 + extracted_texts = [] - n_iterations = math.ceil(len(cropped_lines) / b_s) + n_iterations = math.ceil(len(cropped_lines) / b_s) for i in range(n_iterations): - if i==(n_iterations-1): - n_start = i*b_s + if i == (n_iterations - 1): + n_start = i * b_s imgs = cropped_lines[n_start:] else: - n_start = i*b_s - n_end = (i+1)*b_s + n_start = i * b_s + n_end = (i + 1) * b_s imgs = cropped_lines[n_start:n_end] pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) - + extracted_texts = extracted_texts + generated_text_merged - extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] + extracted_texts_merged = [ + extracted_texts[ind] if cropped_lines_meging_indexing[ind] == 0 else extracted_texts[ind] + + extracted_texts[ind + 1] if + cropped_lines_meging_indexing[ind] == 1 else None for ind in + range(len(cropped_lines_meging_indexing))] extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] #print(extracted_texts_merged, len(extracted_texts_merged)) @@ -5182,23 +5237,21 @@ class Eynollah_ocr: #print(len(unique_cropped_lines_region_indexer), 'unique_cropped_lines_region_indexer') text_by_textregion = [] for ind in unique_cropped_lines_region_indexer: - extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] - + extracted_texts_merged_un = np.array(extracted_texts_merged)[ + np.array(cropped_lines_region_indexer) == ind] + text_by_textregion.append(" ".join(extracted_texts_merged_un)) - - #print(len(text_by_textregion) , indexer_text_region, "text_by_textregion") + #print(len(text_by_textregion) , indexer_text_region, "text_by_textregion") #print(time.time() - t0 ,'elapsed time') - indexer = 0 indexer_textregion = 0 for nn in root1.iter(region_tags): text_subelement_textregion = ET.SubElement(nn, 'TextEquiv') unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode') - has_textline = False for child_textregion in nn: if child_textregion.tag.endswith("TextLine"): @@ -5210,44 +5263,41 @@ class Eynollah_ocr: if has_textline: unicode_textregion.text = text_by_textregion[indexer_textregion] indexer_textregion = indexer_textregion + 1 - - - ET.register_namespace("",name_space) - tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None) + ET.register_namespace("", name_space) + tree1.write(out_file_ocr, xml_declaration=True, method='xml', encoding="utf8", default_namespace=None) #print("Job done in %.1fs", time.time() - t0) else: max_len = 512 padding_token = 299 - image_width = 512#max_len * 4 + image_width = 512 #max_len * 4 image_height = 32 b_s = 8 + img_size = (image_width, image_height) - img_size=(image_width, image_height) - for ind_img in ls_imgs: t0 = time.time() file_name = ind_img.split('.')[0] dir_img = os.path.join(self.dir_in, ind_img) - dir_xml = os.path.join(self.dir_xmls, file_name+'.xml') - out_file_ocr = os.path.join(self.dir_out, file_name+'.xml') + dir_xml = os.path.join(self.dir_xmls, file_name + '.xml') + out_file_ocr = os.path.join(self.dir_out, file_name + '.xml') img = cv2.imread(dir_img) - tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding="utf-8")) - root1=tree1.getroot() - alltags=[elem.tag for elem in root1.iter()] - link=alltags[0].split('}')[0]+'}' + tree1 = ET.parse(dir_xml, parser=ET.XMLParser(encoding="utf-8")) + root1 = tree1.getroot() + alltags = [elem.tag for elem in root1.iter()] + link = alltags[0].split('}')[0] + '}' name_space = alltags[0].split('}')[0] name_space = name_space.split('{')[1] - region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')]) - + region_tags = np.unique([x for x in alltags if x.endswith('TextRegion')]) + cropped_lines = [] cropped_lines_region_indexer = [] cropped_lines_meging_indexing = [] - + tinl = time.time() indexer_text_region = 0 indexer_textlines = 0 @@ -5257,42 +5307,49 @@ class Eynollah_ocr: for child_textlines in child_textregion: if child_textlines.tag.endswith("Coords"): cropped_lines_region_indexer.append(indexer_text_region) - p_h=child_textlines.attrib['points'].split(' ') - textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) - - x,y,w,h = cv2.boundingRect(textline_coords) - - h2w_ratio = h/float(w) - + p_h = child_textlines.attrib['points'].split(' ') + textline_coords = np.array( + [[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h]) + + x, y, w, h = cv2.boundingRect(textline_coords) + + h2w_ratio = h / float(w) + img_poly_on_img = np.copy(img) mask_poly = np.zeros(img.shape) mask_poly = cv2.fillPoly(mask_poly, pts=[textline_coords], color=(1, 1, 1)) - - mask_poly = mask_poly[y:y+h, x:x+w, :] - img_crop = img_poly_on_img[y:y+h, x:x+w, :] + + mask_poly = mask_poly[y:y + h, x:x + w, :] + img_crop = img_poly_on_img[y:y + h, x:x + w, :] if not self.do_not_mask_with_textline_contour: - img_crop[mask_poly==0] = 255 - + img_crop[mask_poly == 0] = 255 + if not self.export_textline_images_and_text: if h2w_ratio > 0.05: - img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) + img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, + image_height, + image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(0) else: splited_images = self.return_textlines_split_if_needed(img_crop) if splited_images: - img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) + img_fin = self.preprocess_and_resize_image_for_ocrcnn_model( + splited_images[0], image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(1) - img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width) - + img_fin = self.preprocess_and_resize_image_for_ocrcnn_model( + splited_images[1], image_height, image_width) + cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(-1) else: - img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) + img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, + image_height, + image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(0) - + if self.export_textline_images_and_text: if child_textlines.tag.endswith("TextEquiv"): for cheild_text in child_textlines: @@ -5301,33 +5358,34 @@ class Eynollah_ocr: if not textline_text: pass else: - with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.txt'), 'w') as text_file: + with open(os.path.join(self.dir_out, file_name + '_line_' + str( + indexer_textlines) + '.txt'), 'w') as text_file: text_file.write(textline_text) - cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.png'), img_crop ) - - indexer_textlines+=1 + cv2.imwrite(os.path.join(self.dir_out, file_name + '_line_' + str( + indexer_textlines) + '.png'), img_crop) + + indexer_textlines += 1 if not self.export_textline_images_and_text: - indexer_text_region = indexer_text_region +1 - + indexer_text_region = indexer_text_region + 1 + if not self.export_textline_images_and_text: extracted_texts = [] - n_iterations = math.ceil(len(cropped_lines) / b_s) + n_iterations = math.ceil(len(cropped_lines) / b_s) for i in range(n_iterations): - if i==(n_iterations-1): - n_start = i*b_s + if i == (n_iterations - 1): + n_start = i * b_s imgs = cropped_lines[n_start:] imgs = np.array(imgs) imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3) else: - n_start = i*b_s - n_end = (i+1)*b_s + n_start = i * b_s + n_end = (i + 1) * b_s imgs = cropped_lines[n_start:n_end] imgs = np.array(imgs).reshape(b_s, image_height, image_width, 3) - preds = self.prediction_model.predict(imgs, verbose=0) pred_texts = self.decode_batch_predictions(preds) @@ -5335,24 +5393,28 @@ class Eynollah_ocr: for ib in range(imgs.shape[0]): pred_texts_ib = pred_texts[ib].strip("[UNK]") extracted_texts.append(pred_texts_ib) - - extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] + + extracted_texts_merged = [ + extracted_texts[ind] if cropped_lines_meging_indexing[ind] == 0 else extracted_texts[ind] + + extracted_texts[ind + 1] if + cropped_lines_meging_indexing[ind] == 1 else None for ind in + range(len(cropped_lines_meging_indexing))] extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) text_by_textregion = [] for ind in unique_cropped_lines_region_indexer: - extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] + extracted_texts_merged_un = np.array(extracted_texts_merged)[ + np.array(cropped_lines_region_indexer) == ind] text_by_textregion.append(" ".join(extracted_texts_merged_un)) - + indexer = 0 indexer_textregion = 0 for nn in root1.iter(region_tags): text_subelement_textregion = ET.SubElement(nn, 'TextEquiv') unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode') - has_textline = False for child_textregion in nn: if child_textregion.tag.endswith("TextLine"): @@ -5365,6 +5427,7 @@ class Eynollah_ocr: unicode_textregion.text = text_by_textregion[indexer_textregion] indexer_textregion = indexer_textregion + 1 - ET.register_namespace("",name_space) - tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None) + ET.register_namespace("", name_space) + tree1.write(out_file_ocr, xml_declaration=True, method='xml', encoding="utf8", + default_namespace=None) #print("Job done in %.1fs", time.time() - t0) diff --git a/src/eynollah/ocrd_cli_binarization.py b/src/eynollah/ocrd_cli_binarization.py index dfdb7b6..2cec6d2 100644 --- a/src/eynollah/ocrd_cli_binarization.py +++ b/src/eynollah/ocrd_cli_binarization.py @@ -32,7 +32,7 @@ def cv2pil(img): def pil2cv(img): # from ocrd/workspace.py - color_conversion = cv2.COLOR_GRAY2BGR if img.mode in ('1', 'L') else cv2.COLOR_RGB2BGR + color_conversion = cv2.COLOR_GRAY2BGR if img.mode in ('1', 'L') else cv2.COLOR_RGB2BGR pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img) return cv2.cvtColor(pil_as_np_array, color_conversion) @@ -112,40 +112,45 @@ class SbbBinarizeProcessor(Processor): bin_image = cv2pil(self.binarizer.run(image=pil2cv(page_image), use_patches=True)) # update METS (add the image file): bin_image_path = self.workspace.save_image_file(bin_image, - file_id + '.IMG-BIN', - page_id=input_file.pageId, - file_grp=self.output_file_grp) - page.add_AlternativeImage(AlternativeImageType(filename=bin_image_path, comments='%s,binarized' % page_xywh['features'])) + file_id + '.IMG-BIN', + page_id=input_file.pageId, + file_grp=self.output_file_grp) + page.add_AlternativeImage( + AlternativeImageType(filename=bin_image_path, comments='%s,binarized' % page_xywh['features'])) elif oplevel == 'region': regions = page.get_AllRegions(['Text', 'Table'], depth=1) if not regions: LOG.warning("Page '%s' contains no text/table regions", page_id) for region in regions: - region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized') + region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, + feature_filter='binarized') region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), use_patches=True)) region_image_bin_path = self.workspace.save_image_file( - region_image_bin, - "%s_%s.IMG-BIN" % (file_id, region.id), - page_id=input_file.pageId, - file_grp=self.output_file_grp) + region_image_bin, + "%s_%s.IMG-BIN" % (file_id, region.id), + page_id=input_file.pageId, + file_grp=self.output_file_grp) region.add_AlternativeImage( - AlternativeImageType(filename=region_image_bin_path, comments='%s,binarized' % region_xywh['features'])) + AlternativeImageType(filename=region_image_bin_path, + comments='%s,binarized' % region_xywh['features'])) elif oplevel == 'line': region_line_tuples = [(r.id, r.get_TextLine()) for r in page.get_AllRegions(['Text'], depth=0)] if not region_line_tuples: LOG.warning("Page '%s' contains no text lines", page_id) for region_id, line in region_line_tuples: - line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized') + line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, + feature_filter='binarized') line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), use_patches=True)) line_image_bin_path = self.workspace.save_image_file( - line_image_bin, - "%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id), - page_id=input_file.pageId, - file_grp=self.output_file_grp) + line_image_bin, + "%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id), + page_id=input_file.pageId, + file_grp=self.output_file_grp) line.add_AlternativeImage( - AlternativeImageType(filename=line_image_bin_path, comments='%s,binarized' % line_xywh['features'])) + AlternativeImageType(filename=line_image_bin_path, + comments='%s,binarized' % line_xywh['features'])) self.workspace.add_file( ID=file_id, diff --git a/src/eynollah/plot.py b/src/eynollah/plot.py index 11b11a5..d4f51ab 100644 --- a/src/eynollah/plot.py +++ b/src/eynollah/plot.py @@ -16,18 +16,18 @@ class EynollahPlotter: """ def __init__( - self, - *, - dir_out, - dir_of_all, - dir_save_page, - dir_of_deskewed, - dir_of_layout, - dir_of_cropped_images, - image_filename_stem, - image_org=None, - scale_x=1, - scale_y=1, + self, + *, + dir_out, + dir_of_all, + dir_save_page, + dir_of_deskewed, + dir_of_layout, + dir_of_cropped_images, + image_filename_stem, + image_org=None, + scale_x=1, + scale_y=1, ): self.dir_out = dir_out self.dir_of_all = dir_of_all @@ -45,22 +45,23 @@ class EynollahPlotter: if self.dir_of_layout is not None: values = np.unique(text_regions_p[:, :]) # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics'] - pixels=['Background' , 'Main text' , 'Image' , 'Separator','Marginalia'] + pixels = ['Background', 'Main text', 'Image', 'Separator', 'Marginalia'] values_indexes = [0, 1, 2, 3, 4] plt.figure(figsize=(40, 40)) plt.rcParams["font.size"] = "40" im = plt.imshow(text_regions_p[:, :]) colors = [im.cmap(im.norm(value)) for value in values] - patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values] + patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], + label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in + values] plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40) plt.savefig(os.path.join(self.dir_of_layout, self.image_filename_stem + "_layout_main.png")) - def save_plot_of_layout_main_all(self, text_regions_p, image_page): if self.dir_of_all is not None: values = np.unique(text_regions_p[:, :]) # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics'] - pixels=['Background' , 'Main text' , 'Image' , 'Separator','Marginalia'] + pixels = ['Background', 'Main text', 'Image', 'Separator', 'Marginalia'] values_indexes = [0, 1, 2, 3, 4] plt.figure(figsize=(80, 40)) plt.rcParams["font.size"] = "40" @@ -69,7 +70,9 @@ class EynollahPlotter: plt.subplot(1, 2, 2) im = plt.imshow(text_regions_p[:, :]) colors = [im.cmap(im.norm(value)) for value in values] - patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values] + patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], + label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in + values] plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60) plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_layout_main_and_page.png")) @@ -83,7 +86,9 @@ class EynollahPlotter: plt.rcParams["font.size"] = "40" im = plt.imshow(text_regions_p[:, :]) colors = [im.cmap(im.norm(value)) for value in values] - patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values] + patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], + label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in + values] plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40) plt.savefig(os.path.join(self.dir_of_layout, self.image_filename_stem + "_layout.png")) @@ -100,7 +105,9 @@ class EynollahPlotter: plt.subplot(1, 2, 2) im = plt.imshow(text_regions_p[:, :]) colors = [im.cmap(im.norm(value)) for value in values] - patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values] + patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], + label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in + values] plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60) plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_layout_and_page.png")) @@ -116,7 +123,9 @@ class EynollahPlotter: plt.subplot(1, 2, 2) im = plt.imshow(textline_mask_tot_ea[:, :]) colors = [im.cmap(im.norm(value)) for value in values] - patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values] + patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], + label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in + values] plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60) plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_textline_and_page.png")) @@ -132,33 +141,36 @@ class EynollahPlotter: cv2.imwrite(os.path.join(self.dir_of_all, self.image_filename_stem + "_page.png"), image_page) if self.dir_save_page is not None: cv2.imwrite(os.path.join(self.dir_save_page, self.image_filename_stem + "_page.png"), image_page) + def save_enhanced_image(self, img_res): cv2.imwrite(os.path.join(self.dir_out, self.image_filename_stem + "_enhanced.png"), img_res) - + def save_plot_of_textline_density(self, img_patch_org): if self.dir_of_all is not None: - plt.figure(figsize=(80,40)) - plt.rcParams['font.size']='50' - plt.subplot(1,2,1) + plt.figure(figsize=(80, 40)) + plt.rcParams['font.size'] = '50' + plt.subplot(1, 2, 1) plt.imshow(img_patch_org) - plt.subplot(1,2,2) - plt.plot(gaussian_filter1d(img_patch_org.sum(axis=1), 3),np.array(range(len(gaussian_filter1d(img_patch_org.sum(axis=1), 3)))),linewidth=8) - plt.xlabel('Density of textline prediction in direction of X axis',fontsize=60) - plt.ylabel('Height',fontsize=60) - plt.yticks([0,len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))]) + plt.subplot(1, 2, 2) + plt.plot(gaussian_filter1d(img_patch_org.sum(axis=1), 3), + np.array(range(len(gaussian_filter1d(img_patch_org.sum(axis=1), 3)))), linewidth=8) + plt.xlabel('Density of textline prediction in direction of X axis', fontsize=60) + plt.ylabel('Height', fontsize=60) + plt.yticks([0, len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))]) plt.gca().invert_yaxis() - plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem+'_density_of_textline.png')) + plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + '_density_of_textline.png')) def save_plot_of_rotation_angle(self, angels, var_res): if self.dir_of_all is not None: - plt.figure(figsize=(60,30)) - plt.rcParams['font.size']='50' - plt.plot(angels,np.array(var_res),'-o',markersize=25,linewidth=4) - plt.xlabel('angle',fontsize=50) - plt.ylabel('variance of sum of rotated textline in direction of x axis',fontsize=50) - plt.plot(angels[np.argmax(var_res)],var_res[np.argmax(np.array(var_res))] ,'*',markersize=50,label='Angle of deskewing=' +str("{:.2f}".format(angels[np.argmax(var_res)]))+r'$\degree$') + plt.figure(figsize=(60, 30)) + plt.rcParams['font.size'] = '50' + plt.plot(angels, np.array(var_res), '-o', markersize=25, linewidth=4) + plt.xlabel('angle', fontsize=50) + plt.ylabel('variance of sum of rotated textline in direction of x axis', fontsize=50) + plt.plot(angels[np.argmax(var_res)], var_res[np.argmax(np.array(var_res))], '*', markersize=50, + label='Angle of deskewing=' + str("{:.2f}".format(angels[np.argmax(var_res)])) + r'$\degree$') plt.legend(loc='best') - plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem+'_rotation_angle.png')) + plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + '_rotation_angle.png')) def write_images_into_directory(self, img_contours, image_page): if self.dir_of_cropped_images is not None: @@ -168,9 +180,9 @@ class EynollahPlotter: box = [x, y, w, h] croped_page, page_coord = crop_image_inside_box(box, image_page) - croped_page = resize_image(croped_page, int(croped_page.shape[0] / self.scale_y), int(croped_page.shape[1] / self.scale_x)) + croped_page = resize_image(croped_page, int(croped_page.shape[0] / self.scale_y), + int(croped_page.shape[1] / self.scale_x)) path = os.path.join(self.dir_of_cropped_images, self.image_filename_stem + "_" + str(index) + ".jpg") cv2.imwrite(path, croped_page) index += 1 - diff --git a/src/eynollah/sbb_binarize.py b/src/eynollah/sbb_binarize.py index 6c9acf1..d4424cd 100644 --- a/src/eynollah/sbb_binarize.py +++ b/src/eynollah/sbb_binarize.py @@ -12,14 +12,15 @@ import os import numpy as np from PIL import Image import cv2 + environ['TF_CPP_MIN_LOG_LEVEL'] = '3' stderr = sys.stderr sys.stderr = open(devnull, 'w') import tensorflow as tf from tensorflow.keras.models import load_model from tensorflow.python.keras import backend as tensorflow_backend -sys.stderr = stderr +sys.stderr = stderr import logging @@ -36,7 +37,7 @@ class SbbBinarizer: self.start_new_session() - self.model_files = glob(self.model_dir+"/*/", recursive = True) + self.model_files = glob(self.model_dir + "/*/", recursive=True) self.models = [] for model_file in self.model_files: @@ -56,52 +57,49 @@ class SbbBinarizer: def load_model(self, model_name): model = load_model(join(self.model_dir, model_name), compile=False) - model_height = model.layers[len(model.layers)-1].output_shape[1] - model_width = model.layers[len(model.layers)-1].output_shape[2] - n_classes = model.layers[len(model.layers)-1].output_shape[3] + model_height = model.layers[len(model.layers) - 1].output_shape[1] + model_width = model.layers[len(model.layers) - 1].output_shape[2] + n_classes = model.layers[len(model.layers) - 1].output_shape[3] return model, model_height, model_width, n_classes def predict(self, model_in, img, use_patches, n_batch_inference=5): tensorflow_backend.set_session(self.session) model, model_height, model_width, n_classes = model_in - + img_org_h = img.shape[0] img_org_w = img.shape[1] - + if img.shape[0] < model_height and img.shape[1] >= model_width: - img_padded = np.zeros(( model_height, img.shape[1], img.shape[2] )) - - index_start_h = int( abs( img.shape[0] - model_height) /2.) + img_padded = np.zeros((model_height, img.shape[1], img.shape[2])) + + index_start_h = int(abs(img.shape[0] - model_height) / 2.) index_start_w = 0 - - img_padded [ index_start_h: index_start_h+img.shape[0], :, : ] = img[:,:,:] - + + img_padded[index_start_h: index_start_h + img.shape[0], :, :] = img[:, :, :] + elif img.shape[0] >= model_height and img.shape[1] < model_width: - img_padded = np.zeros(( img.shape[0], model_width, img.shape[2] )) - - index_start_h = 0 - index_start_w = int( abs( img.shape[1] - model_width) /2.) - - img_padded [ :, index_start_w: index_start_w+img.shape[1], : ] = img[:,:,:] - - + img_padded = np.zeros((img.shape[0], model_width, img.shape[2])) + + index_start_h = 0 + index_start_w = int(abs(img.shape[1] - model_width) / 2.) + + img_padded[:, index_start_w: index_start_w + img.shape[1], :] = img[:, :, :] + elif img.shape[0] < model_height and img.shape[1] < model_width: - img_padded = np.zeros(( model_height, model_width, img.shape[2] )) - - index_start_h = int( abs( img.shape[0] - model_height) /2.) - index_start_w = int( abs( img.shape[1] - model_width) /2.) - - img_padded [ index_start_h: index_start_h+img.shape[0], index_start_w: index_start_w+img.shape[1], : ] = img[:,:,:] - + img_padded = np.zeros((model_height, model_width, img.shape[2])) + + index_start_h = int(abs(img.shape[0] - model_height) / 2.) + index_start_w = int(abs(img.shape[1] - model_width) / 2.) + + img_padded[index_start_h: index_start_h + img.shape[0], index_start_w: index_start_w + img.shape[1], + :] = img[:, :, :] + else: index_start_h = 0 - index_start_w = 0 + index_start_w = 0 img_padded = np.copy(img) - - + img = np.copy(img_padded) - - if use_patches: @@ -110,7 +108,6 @@ class SbbBinarizer: width_mid = model_width - 2 * margin height_mid = model_height - 2 * margin - img = img / float(255.0) img_h = img.shape[0] @@ -130,18 +127,17 @@ class SbbBinarizer: nyf = int(nyf) + 1 else: nyf = int(nyf) - - + list_i_s = [] list_j_s = [] list_x_u = [] list_x_d = [] list_y_u = [] list_y_d = [] - + batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, model_height, model_width,3)) + + img_patch = np.zeros((n_batch_inference, model_height, model_width, 3)) for i in range(nxf): for j in range(nyf): @@ -166,146 +162,167 @@ class SbbBinarizer: if index_y_u > img_h: index_y_u = img_h index_y_d = img_h - model_height - - + list_i_s.append(i) list_j_s.append(j) list_x_u.append(index_x_u) list_x_d.append(index_x_d) list_y_d.append(index_y_d) list_y_u.append(index_y_u) - - img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - + img_patch[batch_indexer, :, :, :] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + batch_indexer = batch_indexer + 1 - - if batch_indexer == n_batch_inference: - - label_p_pred = model.predict(img_patch,verbose=0) - + + label_p_pred = model.predict(img_patch, verbose=0) + seg = np.argmax(label_p_pred, axis=3) - + #print(seg.shape, len(seg), len(list_i_s)) - + indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] + seg_in = seg[indexer_inside_batch, :, :] seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) - + index_y_u_in = list_y_u[indexer_inside_batch] index_y_d_in = list_y_d[indexer_inside_batch] - + index_x_u_in = list_x_u[indexer_inside_batch] index_x_d_in = list_x_d[indexer_inside_batch] - + if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + seg_color = seg_color[0: seg_color.shape[0] - margin, 0: seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0: index_y_u_in - margin, + index_x_d_in + 0: index_x_u_in - margin, :] = seg_color elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - 0, margin: seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin: index_y_u_in - 0, + index_x_d_in + margin: index_x_u_in - 0, :] = seg_color elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - 0, 0: seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin: index_y_u_in - 0, + index_x_d_in + 0: index_x_u_in - margin, :] = seg_color elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + seg_color = seg_color[0: seg_color.shape[0] - margin, margin: seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0: index_y_u_in - margin, + index_x_d_in + margin: index_x_u_in - 0, :] = seg_color elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - margin, + 0: seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin: index_y_u_in - margin, + index_x_d_in + 0: index_x_u_in - margin, :] = seg_color elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - margin, + margin: seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin: index_y_u_in - margin, + index_x_d_in + margin: index_x_u_in - 0, :] = seg_color elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + seg_color = seg_color[0: seg_color.shape[0] - margin, + margin: seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0: index_y_u_in - margin, + index_x_d_in + margin: index_x_u_in - margin, :] = seg_color elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - 0, + margin: seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin: index_y_u_in - 0, + index_x_d_in + margin: index_x_u_in - margin, :] = seg_color else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 - - + seg_color = seg_color[margin: seg_color.shape[0] - margin, + margin: seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin: index_y_u_in - margin, + index_x_d_in + margin: index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch + 1 + list_i_s = [] list_j_s = [] list_x_u = [] list_x_d = [] list_y_u = [] list_y_d = [] - + batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, model_height, model_width,3)) - - elif i==(nxf-1) and j==(nyf-1): - label_p_pred = model.predict(img_patch,verbose=0) - + + img_patch = np.zeros((n_batch_inference, model_height, model_width, 3)) + + elif i == (nxf - 1) and j == (nyf - 1): + label_p_pred = model.predict(img_patch, verbose=0) + seg = np.argmax(label_p_pred, axis=3) - + #print(seg.shape, len(seg), len(list_i_s)) - + indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): - seg_in = seg[indexer_inside_batch,:,:] + seg_in = seg[indexer_inside_batch, :, :] seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) - + index_y_u_in = list_y_u[indexer_inside_batch] index_y_d_in = list_y_d[indexer_inside_batch] - + index_x_u_in = list_x_u[indexer_inside_batch] index_x_d_in = list_x_d[indexer_inside_batch] - + if i_batch == 0 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + seg_color = seg_color[0: seg_color.shape[0] - margin, 0: seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0: index_y_u_in - margin, + index_x_d_in + 0: index_x_u_in - margin, :] = seg_color elif i_batch == nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - 0, margin: seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin: index_y_u_in - 0, + index_x_d_in + margin: index_x_u_in - 0, :] = seg_color elif i_batch == 0 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - 0, 0: seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin: index_y_u_in - 0, + index_x_d_in + 0: index_x_u_in - margin, :] = seg_color elif i_batch == nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + seg_color = seg_color[0: seg_color.shape[0] - margin, margin: seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0: index_y_u_in - margin, + index_x_d_in + margin: index_x_u_in - 0, :] = seg_color elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - margin, + 0: seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin: index_y_u_in - margin, + index_x_d_in + 0: index_x_u_in - margin, :] = seg_color elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - margin, + margin: seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin: index_y_u_in - margin, + index_x_d_in + margin: index_x_u_in - 0, :] = seg_color elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + seg_color = seg_color[0: seg_color.shape[0] - margin, + margin: seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0: index_y_u_in - margin, + index_x_d_in + margin: index_x_u_in - margin, :] = seg_color elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - 0, + margin: seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin: index_y_u_in - 0, + index_x_d_in + margin: index_x_u_in - margin, :] = seg_color else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color - - indexer_inside_batch = indexer_inside_batch +1 - - + seg_color = seg_color[margin: seg_color.shape[0] - margin, + margin: seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin: index_y_u_in - margin, + index_x_d_in + margin: index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch + 1 + list_i_s = [] list_j_s = [] list_x_u = [] list_x_d = [] list_y_u = [] list_y_d = [] - + batch_indexer = 0 - - img_patch = np.zeros((n_batch_inference, model_height, model_width,3)) - - - - prediction_true = prediction_true[index_start_h: index_start_h+img_org_h, index_start_w: index_start_w+img_org_w,:] + + img_patch = np.zeros((n_batch_inference, model_height, model_width, 3)) + + prediction_true = prediction_true[index_start_h: index_start_h + img_org_h, + index_start_w: index_start_w + img_org_w, :] prediction_true = prediction_true.astype(np.uint8) else: @@ -320,13 +337,13 @@ class SbbBinarizer: seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) - return prediction_true[:,:,0] + return prediction_true[:, :, 0] def run(self, image=None, image_path=None, save=None, use_patches=False, dir_in=None, dir_out=None): - print(dir_in,'dir_in') + print(dir_in, 'dir_in') if not dir_in: if (image is not None and image_path is not None) or \ - (image is None and image_path is None): + (image is None and image_path is None): raise ValueError("Must pass either a opencv2 image or an image_path") if image_path is not None: image = cv2.imread(image_path) @@ -355,11 +372,11 @@ class SbbBinarizer: cv2.imwrite(save, img_last) return img_last else: - ls_imgs = os.listdir(dir_in) + ls_imgs = os.listdir(dir_in) for image_name in ls_imgs: image_stem = image_name.split('.')[0] - print(image_name,'image_name') - image = cv2.imread(os.path.join(dir_in,image_name) ) + print(image_name, 'image_name') + image = cv2.imread(os.path.join(dir_in, image_name)) img_last = 0 for n, (model, model_file) in enumerate(zip(self.models, self.model_files)): self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files))) @@ -381,5 +398,5 @@ class SbbBinarizer: kernel = np.ones((5, 5), np.uint8) img_last[:, :][img_last[:, :] > 0] = 255 img_last = (img_last[:, :] == 0) * 255 - - cv2.imwrite(os.path.join(dir_out,image_stem+'.png'), img_last) + + cv2.imwrite(os.path.join(dir_out, image_stem + '.png'), img_last) diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index b4eb3a6..6644cfd 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -17,79 +17,78 @@ from .contour import (contours_in_same_horizon, def return_x_start_end_mothers_childs_and_type_of_reading_order( x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff): - - x_start=[] - x_end=[] - kind=[]#if covers 2 and more than 2 columns set it to 1 otherwise 0 - len_sep=[] - y_sep=[] - y_diff=[] - new_main_sep_y=[] - - indexer=0 + x_start = [] + x_end = [] + kind = [] #if covers 2 and more than 2 columns set it to 1 otherwise 0 + len_sep = [] + y_sep = [] + y_diff = [] + new_main_sep_y = [] + + indexer = 0 for i in range(len(x_min_hor_some)): - starting=x_min_hor_some[i]-peak_points - starting=starting[starting>=0] - min_start=np.argmin(starting) - ending=peak_points-x_max_hor_some[i] - len_ending_neg=len(ending[ending<=0]) - - ending=ending[ending>0] - max_end=np.argmin(ending)+len_ending_neg - - if (max_end-min_start)>=2: - if (max_end-min_start)==(len(peak_points)-1): + starting = x_min_hor_some[i] - peak_points + starting = starting[starting >= 0] + min_start = np.argmin(starting) + ending = peak_points - x_max_hor_some[i] + len_ending_neg = len(ending[ending <= 0]) + + ending = ending[ending > 0] + max_end = np.argmin(ending) + len_ending_neg + + if (max_end - min_start) >= 2: + if (max_end - min_start) == (len(peak_points) - 1): new_main_sep_y.append(indexer) - + #print((max_end-min_start),len(peak_points),'(max_end-min_start)') y_sep.append(cy_hor_some[i]) y_diff.append(cy_hor_diff[i]) x_end.append(max_end) - - x_start.append( min_start) - - len_sep.append(max_end-min_start) - if max_end==min_start+1: + + x_start.append(min_start) + + len_sep.append(max_end - min_start) + if max_end == min_start + 1: kind.append(0) else: kind.append(1) - - indexer+=1 + + indexer += 1 x_start_returned = np.array(x_start, dtype=int) x_end_returned = np.array(x_end, dtype=int) y_sep_returned = np.array(y_sep, dtype=int) y_diff_returned = np.array(y_diff, dtype=int) - + all_args_uniq = contours_in_same_horizon(y_sep_returned) - args_to_be_unified=[] - y_unified=[] - y_diff_unified=[] - x_s_unified=[] - x_e_unified=[] - if len(all_args_uniq)>0: + args_to_be_unified = [] + y_unified = [] + y_diff_unified = [] + x_s_unified = [] + x_e_unified = [] + if len(all_args_uniq) > 0: #print('burda') if type(all_args_uniq[0]) is list: for dd in range(len(all_args_uniq)): - if len(all_args_uniq[dd])==2: - x_s_same_hor=np.array(x_start_returned)[all_args_uniq[dd]] - x_e_same_hor=np.array(x_end_returned)[all_args_uniq[dd]] - y_sep_same_hor=np.array(y_sep_returned)[all_args_uniq[dd]] - y_diff_same_hor=np.array(y_diff_returned)[all_args_uniq[dd]] + if len(all_args_uniq[dd]) == 2: + x_s_same_hor = np.array(x_start_returned)[all_args_uniq[dd]] + x_e_same_hor = np.array(x_end_returned)[all_args_uniq[dd]] + y_sep_same_hor = np.array(y_sep_returned)[all_args_uniq[dd]] + y_diff_same_hor = np.array(y_diff_returned)[all_args_uniq[dd]] #print('burda2') - if (x_s_same_hor[0]==x_e_same_hor[1]-1 or - x_s_same_hor[1]==x_e_same_hor[0]-1 and - x_s_same_hor[0]!=x_s_same_hor[1] and - x_e_same_hor[0]!=x_e_same_hor[1]): + if (x_s_same_hor[0] == x_e_same_hor[1] - 1 or + x_s_same_hor[1] == x_e_same_hor[0] - 1 and + x_s_same_hor[0] != x_s_same_hor[1] and + x_e_same_hor[0] != x_e_same_hor[1]): #print('burda3') for arg_in in all_args_uniq[dd]: #print(arg_in,'arg_in') args_to_be_unified.append(arg_in) - y_selected=np.min(y_sep_same_hor) - y_diff_selected=np.max(y_diff_same_hor) - x_s_selected=np.min(x_s_same_hor) - x_e_selected=np.max(x_e_same_hor) - + y_selected = np.min(y_sep_same_hor) + y_diff_selected = np.max(y_diff_same_hor) + x_s_selected = np.min(x_s_same_hor) + x_e_selected = np.max(x_e_same_hor) + x_s_unified.append(x_s_selected) x_e_unified.append(x_e_selected) y_unified.append(y_selected) @@ -101,129 +100,129 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order( #print(x_e_unified,'x_s_selected') #print(y_unified,'x_e_same_hor') - args_lines_not_unified=list( set(range(len(y_sep_returned)))-set(args_to_be_unified) ) + args_lines_not_unified = list(set(range(len(y_sep_returned))) - set(args_to_be_unified)) #print(args_lines_not_unified,'args_lines_not_unified') - - x_start_returned_not_unified=list( np.array(x_start_returned)[args_lines_not_unified] ) - x_end_returned_not_unified=list( np.array(x_end_returned)[args_lines_not_unified] ) - y_sep_returned_not_unified=list (np.array(y_sep_returned)[args_lines_not_unified] ) - y_diff_returned_not_unified=list (np.array(y_diff_returned)[args_lines_not_unified] ) - + + x_start_returned_not_unified = list(np.array(x_start_returned)[args_lines_not_unified]) + x_end_returned_not_unified = list(np.array(x_end_returned)[args_lines_not_unified]) + y_sep_returned_not_unified = list(np.array(y_sep_returned)[args_lines_not_unified]) + y_diff_returned_not_unified = list(np.array(y_diff_returned)[args_lines_not_unified]) + for dv in range(len(y_unified)): y_sep_returned_not_unified.append(y_unified[dv]) y_diff_returned_not_unified.append(y_diff_unified[dv]) x_start_returned_not_unified.append(x_s_unified[dv]) x_end_returned_not_unified.append(x_e_unified[dv]) - + #print(y_sep_returned,'y_sep_returned') #print(x_start_returned,'x_start_returned') #print(x_end_returned,'x_end_returned') - + x_start_returned = np.array(x_start_returned_not_unified, dtype=int) x_end_returned = np.array(x_end_returned_not_unified, dtype=int) y_sep_returned = np.array(y_sep_returned_not_unified, dtype=int) y_diff_returned = np.array(y_diff_returned_not_unified, dtype=int) - + #print(y_sep_returned,'y_sep_returned2') #print(x_start_returned,'x_start_returned2') #print(x_end_returned,'x_end_returned2') #print(new_main_sep_y,'new_main_sep_y') - + #print(x_start,'x_start') #print(x_end,'x_end') - if len(new_main_sep_y)>0: - - min_ys=np.min(y_sep) - max_ys=np.max(y_sep) - - y_mains= [min_ys] - y_mains_sep_ohne_grenzen=[] - + if len(new_main_sep_y) > 0: + + min_ys = np.min(y_sep) + max_ys = np.max(y_sep) + + y_mains = [min_ys] + y_mains_sep_ohne_grenzen = [] + for ii in range(len(new_main_sep_y)): y_mains.append(y_sep[new_main_sep_y[ii]]) y_mains_sep_ohne_grenzen.append(y_sep[new_main_sep_y[ii]]) - + y_mains.append(max_ys) - - y_mains_sorted=np.sort(y_mains) - diff=np.diff(y_mains_sorted) - argm=np.argmax(diff) - - y_min_new=y_mains_sorted[argm] - y_max_new=y_mains_sorted[argm+1] - + + y_mains_sorted = np.sort(y_mains) + diff = np.diff(y_mains_sorted) + argm = np.argmax(diff) + + y_min_new = y_mains_sorted[argm] + y_max_new = y_mains_sorted[argm + 1] + #print(y_min_new,'y_min_new') #print(y_max_new,'y_max_new') #print(y_sep[new_main_sep_y[0]],y_sep,'yseps') - x_start=np.array(x_start) - x_end=np.array(x_end) - kind=np.array(kind) - y_sep=np.array(y_sep) + x_start = np.array(x_start) + x_end = np.array(x_end) + kind = np.array(kind) + y_sep = np.array(y_sep) if (y_min_new in y_mains_sep_ohne_grenzen and - y_max_new in y_mains_sep_ohne_grenzen): - x_start=x_start[(y_sep>y_min_new) & (y_sepy_min_new) & (y_sepy_min_new) & (y_sepy_min_new) & (y_sep y_min_new) & (y_sep < y_max_new)] + x_end = x_end[(y_sep > y_min_new) & (y_sep < y_max_new)] + kind = kind[(y_sep > y_min_new) & (y_sep < y_max_new)] + y_sep = y_sep[(y_sep > y_min_new) & (y_sep < y_max_new)] elif (y_min_new in y_mains_sep_ohne_grenzen and y_max_new not in y_mains_sep_ohne_grenzen): #print('burda') - x_start=x_start[(y_sep>y_min_new) & (y_sep<=y_max_new)] + x_start = x_start[(y_sep > y_min_new) & (y_sep <= y_max_new)] #print('burda1') - x_end=x_end[(y_sep>y_min_new) & (y_sep<=y_max_new)] + x_end = x_end[(y_sep > y_min_new) & (y_sep <= y_max_new)] #print('burda2') - kind=kind[(y_sep>y_min_new) & (y_sep<=y_max_new)] - y_sep=y_sep[(y_sep>y_min_new) & (y_sep<=y_max_new)] + kind = kind[(y_sep > y_min_new) & (y_sep <= y_max_new)] + y_sep = y_sep[(y_sep > y_min_new) & (y_sep <= y_max_new)] elif (y_min_new not in y_mains_sep_ohne_grenzen and y_max_new in y_mains_sep_ohne_grenzen): - x_start=x_start[(y_sep>=y_min_new) & (y_sep=y_min_new) & (y_sep=y_min_new) & (y_sep=y_min_new) & (y_sep= y_min_new) & (y_sep < y_max_new)] + x_end = x_end[(y_sep >= y_min_new) & (y_sep < y_max_new)] + kind = kind[(y_sep >= y_min_new) & (y_sep < y_max_new)] + y_sep = y_sep[(y_sep >= y_min_new) & (y_sep < y_max_new)] else: - x_start=x_start[(y_sep>=y_min_new) & (y_sep<=y_max_new)] - x_end=x_end[(y_sep>=y_min_new) & (y_sep<=y_max_new)] - kind=kind[(y_sep>=y_min_new) & (y_sep<=y_max_new)] - y_sep=y_sep[(y_sep>=y_min_new) & (y_sep<=y_max_new)] + x_start = x_start[(y_sep >= y_min_new) & (y_sep <= y_max_new)] + x_end = x_end[(y_sep >= y_min_new) & (y_sep <= y_max_new)] + kind = kind[(y_sep >= y_min_new) & (y_sep <= y_max_new)] + y_sep = y_sep[(y_sep >= y_min_new) & (y_sep <= y_max_new)] #print(x_start,'x_start') #print(x_end,'x_end') #print(len_sep) - - deleted=[] - for i in range(len(x_start)-1): - nodes_i=set(range(x_start[i],x_end[i]+1)) - for j in range(i+1,len(x_start)): - if nodes_i==set(range(x_start[j],x_end[j]+1)): - deleted.append(j) + + deleted = [] + for i in range(len(x_start) - 1): + nodes_i = set(range(x_start[i], x_end[i] + 1)) + for j in range(i + 1, len(x_start)): + if nodes_i == set(range(x_start[j], x_end[j] + 1)): + deleted.append(j) #print(np.unique(deleted)) - - remained_sep_indexes=set(range(len(x_start)))-set(np.unique(deleted) ) + + remained_sep_indexes = set(range(len(x_start))) - set(np.unique(deleted)) #print(remained_sep_indexes,'remained_sep_indexes') - mother=[]#if it has mother - child=[] + mother = [] #if it has mother + child = [] for index_i in remained_sep_indexes: - have_mother=0 - have_child=0 - nodes_ind=set(range(x_start[index_i],x_end[index_i]+1)) + have_mother = 0 + have_child = 0 + nodes_ind = set(range(x_start[index_i], x_end[index_i] + 1)) for index_j in remained_sep_indexes: - nodes_ind_j=set(range(x_start[index_j],x_end[index_j]+1)) - if nodes_indnodes_ind_j: - have_child=1 + nodes_ind_j = set(range(x_start[index_j], x_end[index_j] + 1)) + if nodes_ind < nodes_ind_j: + have_mother = 1 + if nodes_ind > nodes_ind_j: + have_child = 1 mother.append(have_mother) child.append(have_child) - + #print(mother,'mother') #print(len(remained_sep_indexes)) #print(len(remained_sep_indexes),len(x_start),len(x_end),len(y_sep),'lens') - y_lines_without_mother=[] - x_start_without_mother=[] - x_end_without_mother=[] - - y_lines_with_child_without_mother=[] - x_start_with_child_without_mother=[] - x_end_with_child_without_mother=[] + y_lines_without_mother = [] + x_start_without_mother = [] + x_end_without_mother = [] + + y_lines_with_child_without_mother = [] + x_start_with_child_without_mother = [] + x_end_with_child_without_mother = [] mother = np.array(mother) child = np.array(child) @@ -233,37 +232,38 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order( x_start = np.array(x_start) x_end = np.array(x_end) y_sep = np.array(y_sep) - - if len(remained_sep_indexes)>1: + + if len(remained_sep_indexes) > 1: #print(np.array(remained_sep_indexes),'np.array(remained_sep_indexes)') #print(np.array(mother),'mother') ##remained_sep_indexes_without_mother = remained_sep_indexes[mother==0] ##remained_sep_indexes_with_child_without_mother = remained_sep_indexes[mother==0 & child==1] - remained_sep_indexes_without_mother=np.array(list(remained_sep_indexes))[np.array(mother)==0] - remained_sep_indexes_with_child_without_mother=np.array(list(remained_sep_indexes))[(np.array(mother)==0) & (np.array(child)==1)] + remained_sep_indexes_without_mother = np.array(list(remained_sep_indexes))[np.array(mother) == 0] + remained_sep_indexes_with_child_without_mother = np.array(list(remained_sep_indexes))[ + (np.array(mother) == 0) & (np.array(child) == 1)] #print(remained_sep_indexes_without_mother,'remained_sep_indexes_without_mother') #print(remained_sep_indexes_without_mother,'remained_sep_indexes_without_mother') - + x_end_with_child_without_mother = x_end[remained_sep_indexes_with_child_without_mother] x_start_with_child_without_mother = x_start[remained_sep_indexes_with_child_without_mother] y_lines_with_child_without_mother = y_sep[remained_sep_indexes_with_child_without_mother] - reading_orther_type=0 + reading_orther_type = 0 x_end_without_mother = x_end[remained_sep_indexes_without_mother] x_start_without_mother = x_start[remained_sep_indexes_without_mother] y_lines_without_mother = y_sep[remained_sep_indexes_without_mother] - - if len(remained_sep_indexes_without_mother)>=2: - for i in range(len(remained_sep_indexes_without_mother)-1): - nodes_i=set(range(x_start[remained_sep_indexes_without_mother[i]], - x_end[remained_sep_indexes_without_mother[i]] - # + 1 - )) - for j in range(i+1,len(remained_sep_indexes_without_mother)): - nodes_j=set(range(x_start[remained_sep_indexes_without_mother[j]], - x_end[remained_sep_indexes_without_mother[j]] - # + 1 - )) + + if len(remained_sep_indexes_without_mother) >= 2: + for i in range(len(remained_sep_indexes_without_mother) - 1): + nodes_i = set(range(x_start[remained_sep_indexes_without_mother[i]], + x_end[remained_sep_indexes_without_mother[i]] + # + 1 + )) + for j in range(i + 1, len(remained_sep_indexes_without_mother)): + nodes_j = set(range(x_start[remained_sep_indexes_without_mother[j]], + x_end[remained_sep_indexes_without_mother[j]] + # + 1 + )) set_diff = nodes_i - nodes_j if set_diff != nodes_i: reading_orther_type = 1 @@ -273,16 +273,16 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order( #print(y_lines_with_child_without_mother,'y_lines_with_child_without_mother') #print(x_start_with_child_without_mother,'x_start_with_child_without_mother') #print(x_end_with_child_without_mother,'x_end_with_hild_without_mother') - - len_sep_with_child = len(child[child==1]) - + + len_sep_with_child = len(child[child == 1]) + #print(len_sep_with_child,'len_sep_with_child') there_is_sep_with_child = 0 if len_sep_with_child >= 1: there_is_sep_with_child = 1 #print(all_args_uniq,'all_args_uniq') #print(args_to_be_unified,'args_to_be_unified') - + return (reading_orther_type, x_start_returned, x_end_returned, @@ -297,10 +297,12 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order( x_end_with_child_without_mother, new_main_sep_y) + def crop_image_inside_box(box, img_org_copy): - image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]] + image_box = img_org_copy[box[1]: box[1] + box[3], box[0]: box[0] + box[2]] return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]] + def otsu_copy_binary(img): img_r = np.zeros((img.shape[0], img.shape[1], 3)) img1 = img[:, :, 0] @@ -313,6 +315,7 @@ def otsu_copy_binary(img): img_r = img_r / float(np.max(img_r)) * 255 return img_r + def find_features_of_lines(contours_main): areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] @@ -347,6 +350,7 @@ def find_features_of_lines(contours_main): y_max_main, np.array(cx_main)) + def boosting_headers_by_longshot_region_segmentation(textregion_pre_p, textregion_pre_np, img_only_text): textregion_pre_p_org = np.copy(textregion_pre_p) # 4 is drop capitals @@ -366,27 +370,29 @@ def boosting_headers_by_longshot_region_segmentation(textregion_pre_p, textregio (textregion_pre_p[:, :, 0] != 2)] = 1 return textregion_pre_p + def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8): regions_without_separators_0 = regions_without_separators.sum(axis=1) z = gaussian_filter1d(regions_without_separators_0, sigma_) return np.std(z) + def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8): regions_without_separators_0 = regions_without_separators.sum(axis=0) ##plt.plot(regions_without_separators_0) ##plt.show() sigma_ = 35 # 70#35 - meda_n_updown = regions_without_separators_0[len(regions_without_separators_0) :: -1] + meda_n_updown = regions_without_separators_0[len(regions_without_separators_0):: -1] first_nonzero = next((i for i, x in enumerate(regions_without_separators_0) if x), 0) last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0) last_nonzero = len(regions_without_separators_0) - last_nonzero y = regions_without_separators_0 # [first_nonzero:last_nonzero] y_help = np.zeros(len(y) + 20) - y_help[10 : len(y) + 10] = y + y_help[10: len(y) + 10] = y x = np.arange(len(y)) zneg_rev = -y_help + np.max(y_help) zneg = np.zeros(len(zneg_rev) + 20) - zneg[10 : len(zneg_rev) + 10] = zneg_rev + zneg[10: len(zneg_rev) + 10] = zneg_rev z = gaussian_filter1d(y, sigma_) zneg = gaussian_filter1d(zneg, sigma_) @@ -431,16 +437,16 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl interest_neg_fin = interest_neg[(interest_neg < grenze)] peaks_neg_fin = peaks_neg[(interest_neg < grenze)] # interest_neg_fin=interest_neg[(interest_neg= 3: - index_sort_interest_neg_fin= np.argsort(interest_neg_fin) + if (num_col_classifier - ((len(interest_neg_fin)) + 1)) >= 3: + index_sort_interest_neg_fin = np.argsort(interest_neg_fin) peaks_neg_sorted = np.array(peaks_neg)[index_sort_interest_neg_fin] interest_neg_fin_sorted = np.array(interest_neg_fin)[index_sort_interest_neg_fin] - - if len(index_sort_interest_neg_fin)>=num_col_classifier: - peaks_neg_fin = list( peaks_neg_sorted[:num_col_classifier] ) - interest_neg_fin = list( interest_neg_fin_sorted[:num_col_classifier] ) + + if len(index_sort_interest_neg_fin) >= num_col_classifier: + peaks_neg_fin = list(peaks_neg_sorted[:num_col_classifier]) + interest_neg_fin = list(interest_neg_fin_sorted[:num_col_classifier]) else: peaks_neg_fin = peaks_neg[:] interest_neg_fin = interest_neg[:] @@ -458,18 +464,18 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl if num_col == 3: if ((peaks_neg_fin[0] > p_g_u and peaks_neg_fin[1] > p_g_u) or - (peaks_neg_fin[0] < p_g_l and - peaks_neg_fin[1] < p_g_l) or - (peaks_neg_fin[0] + 200 < p_m and - peaks_neg_fin[1] < p_m) or - (peaks_neg_fin[0] - 200 > p_m and - peaks_neg_fin[1] > p_m)): + (peaks_neg_fin[0] < p_g_l and + peaks_neg_fin[1] < p_g_l) or + (peaks_neg_fin[0] + 200 < p_m and + peaks_neg_fin[1] < p_m) or + (peaks_neg_fin[0] - 200 > p_m and + peaks_neg_fin[1] > p_m)): num_col = 1 peaks_neg_fin = [] if num_col == 2: if (peaks_neg_fin[0] > p_g_u or - peaks_neg_fin[0] < p_g_l): + peaks_neg_fin[0] < p_g_l): num_col = 1 peaks_neg_fin = [] @@ -512,12 +518,12 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl if num_col == 3: if ((peaks_neg_true[0] > p_g_u and peaks_neg_true[1] > p_g_u) or - (peaks_neg_true[0] < p_g_l and - peaks_neg_true[1] < p_g_l) or - (peaks_neg_true[0] < p_m and - peaks_neg_true[1] + 200 < p_m) or - (peaks_neg_true[0] - 200 > p_m and - peaks_neg_true[1] > p_m)): + (peaks_neg_true[0] < p_g_l and + peaks_neg_true[1] < p_g_l) or + (peaks_neg_true[0] < p_m and + peaks_neg_true[1] + 200 < p_m) or + (peaks_neg_true[0] - 200 > p_m and + peaks_neg_true[1] > p_m)): num_col = 1 peaks_neg_true = [] elif (p_g_u > peaks_neg_true[0] > p_g_l and @@ -529,7 +535,7 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl if num_col == 2: if (peaks_neg_true[0] > p_g_u or - peaks_neg_true[0] < p_g_l): + peaks_neg_true[0] < p_g_l): num_col = 1 peaks_neg_true = [] @@ -569,7 +575,7 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): ##plt.show() sigma_ = 15 - meda_n_updown = regions_without_separators_0[len(regions_without_separators_0) :: -1] + meda_n_updown = regions_without_separators_0[len(regions_without_separators_0):: -1] first_nonzero = next((i for i, x in enumerate(regions_without_separators_0) if x), 0) last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0) @@ -578,12 +584,12 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): y = regions_without_separators_0 # [first_nonzero:last_nonzero] y_help = np.zeros(len(y) + 20) - y_help[10 : len(y) + 10] = y + y_help[10: len(y) + 10] = y x = np.arange(len(y)) zneg_rev = -y_help + np.max(y_help) zneg = np.zeros(len(zneg_rev) + 20) - zneg[10 : len(zneg_rev) + 10] = zneg_rev + zneg[10: len(zneg_rev) + 10] = zneg_rev z = gaussian_filter1d(y, sigma_) zneg = gaussian_filter1d(zneg, sigma_) @@ -626,19 +632,19 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): if num_col == 3: if ((peaks_neg_fin[0] > p_g_u and peaks_neg_fin[1] > p_g_u) or - (peaks_neg_fin[0] < p_g_l and - peaks_neg_fin[1] < p_g_l) or - (peaks_neg_fin[0] < p_m and - peaks_neg_fin[1] < p_m) or - (peaks_neg_fin[0] > p_m and - peaks_neg_fin[1] > p_m)): + (peaks_neg_fin[0] < p_g_l and + peaks_neg_fin[1] < p_g_l) or + (peaks_neg_fin[0] < p_m and + peaks_neg_fin[1] < p_m) or + (peaks_neg_fin[0] > p_m and + peaks_neg_fin[1] > p_m)): num_col = 1 else: pass if num_col == 2: if (peaks_neg_fin[0] > p_g_u or - peaks_neg_fin[0] < p_g_l): + peaks_neg_fin[0] < p_g_l): num_col = 1 else: pass @@ -678,12 +684,12 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): if num_col == 3: if ((peaks_neg_true[0] > p_g_u and peaks_neg_true[1] > p_g_u) or - (peaks_neg_true[0] < p_g_l and - peaks_neg_true[1] < p_g_l) or - (peaks_neg_true[0] < p_m and - peaks_neg_true[1] < p_m) or - (peaks_neg_true[0] > p_m and - peaks_neg_true[1] > p_m)): + (peaks_neg_true[0] < p_g_l and + peaks_neg_true[1] < p_g_l) or + (peaks_neg_true[0] < p_m and + peaks_neg_true[1] < p_m) or + (peaks_neg_true[0] > p_m and + peaks_neg_true[1] > p_m)): num_col = 1 peaks_neg_true = [] elif (p_g_u > peaks_neg_true[0] > p_g_l and @@ -697,13 +703,13 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): if num_col == 2: if (peaks_neg_true[0] > p_g_u or - peaks_neg_true[0] < p_g_l): + peaks_neg_true[0] < p_g_l): num_col = 1 peaks_neg_true = [] if num_col == 4: if (len(np.array(peaks_neg_true)[np.array(peaks_neg_true) < p_g_l]) == 2 or - len(np.array(peaks_neg_true)[np.array(peaks_neg_true) > (len(y) - p_g_l)]) == 2): + len(np.array(peaks_neg_true)[np.array(peaks_neg_true) > (len(y) - p_g_l)]) == 2): num_col = 1 peaks_neg_true = [] else: @@ -808,7 +814,7 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1): # boxes.append([int(x), int(y), int(w), int(h)]) map_of_drop_contour_bb = np.zeros((layout1.shape[0], layout1.shape[1])) - map_of_drop_contour_bb[y : y + h, x : x + w] = layout1[y : y + h, x : x + w] + map_of_drop_contour_bb[y: y + h, x: x + w] = layout1[y: y + h, x: x + w] if (100. * (map_of_drop_contour_bb == 1).sum() / (map_of_drop_contour_bb == 5).sum()) >= 15: @@ -843,19 +849,19 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop box0 = box + (0,) mask_of_drop_cpaital_in_early_layout = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1])) mask_of_drop_cpaital_in_early_layout[box] = text_regions_p[box] - - all_drop_capital_pixels_which_is_text_in_early_lo = np.sum(mask_of_drop_cpaital_in_early_layout[box]==1) + + all_drop_capital_pixels_which_is_text_in_early_lo = np.sum(mask_of_drop_cpaital_in_early_layout[box] == 1) mask_of_drop_cpaital_in_early_layout[box] = 1 - all_drop_capital_pixels = np.sum(mask_of_drop_cpaital_in_early_layout==1) - + all_drop_capital_pixels = np.sum(mask_of_drop_cpaital_in_early_layout == 1) + percent_text_to_all_in_drop = all_drop_capital_pixels_which_is_text_in_early_lo / float(all_drop_capital_pixels) if (areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) / float(w * h) > 0.6 and - percent_text_to_all_in_drop >= 0.3): + percent_text_to_all_in_drop >= 0.3): layout_in_patch[box0] = drop_capital_label else: layout_in_patch[box0][layout_in_patch[box0] == drop_capital_label] = drop_capital_label layout_in_patch[box0][layout_in_patch[box0] == 0] = drop_capital_label - layout_in_patch[box0][layout_in_patch[box0] == 4] = drop_capital_label# images + layout_in_patch[box0][layout_in_patch[box0] == 4] = drop_capital_label # images #layout_in_patch[box0][layout_in_patch[box0] == drop_capital_label] = 1#drop_capital_label return layout_in_patch @@ -867,39 +873,38 @@ def check_any_text_region_in_model_one_is_main_or_header( all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered): - cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \ find_new_features_of_contours(contours_only_text_parent) - length_con=x_max_main-x_min_main - height_con=y_max_main-y_min_main + length_con = x_max_main - x_min_main + height_con = y_max_main - y_min_main - all_found_textline_polygons_main=[] - all_found_textline_polygons_head=[] + all_found_textline_polygons_main = [] + all_found_textline_polygons_head = [] - all_box_coord_main=[] - all_box_coord_head=[] + all_box_coord_main = [] + all_box_coord_head = [] - slopes_main=[] - slopes_head=[] + slopes_main = [] + slopes_head = [] - contours_only_text_parent_main=[] - contours_only_text_parent_head=[] + contours_only_text_parent_main = [] + contours_only_text_parent_head = [] - contours_only_text_parent_main_d=[] - contours_only_text_parent_head_d=[] + contours_only_text_parent_main_d = [] + contours_only_text_parent_head_d = [] for ii in range(len(contours_only_text_parent)): - con=contours_only_text_parent[ii] - img=np.zeros((regions_model_1.shape[0],regions_model_1.shape[1],3)) + con = contours_only_text_parent[ii] + img = np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3)) img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) - all_pixels=((img[:,:,0]==255)*1).sum() - pixels_header=( ( (img[:,:,0]==255) & (regions_model_full[:,:,0]==2) )*1 ).sum() - pixels_main=all_pixels-pixels_header + all_pixels = ((img[:, :, 0] == 255) * 1).sum() + pixels_header = (((img[:, :, 0] == 255) & (regions_model_full[:, :, 0] == 2)) * 1).sum() + pixels_main = all_pixels - pixels_header - if (pixels_header>=pixels_main) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ): - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 + if (pixels_header >= pixels_main) and ((length_con[ii] / float(height_con[ii])) >= 1.3): + regions_model_1[:, :][(regions_model_1[:, :] == 1) & (img[:, :, 0] == 255)] = 2 contours_only_text_parent_head.append(con) if contours_only_text_parent_d_ordered is not None: contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) @@ -907,7 +912,7 @@ def check_any_text_region_in_model_one_is_main_or_header( slopes_head.append(slopes[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) else: - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1 + regions_model_1[:, :][(regions_model_1[:, :] == 1) & (img[:, :, 0] == 255)] = 1 contours_only_text_parent_main.append(con) if contours_only_text_parent_d_ordered is not None: contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) @@ -929,13 +934,13 @@ def check_any_text_region_in_model_one_is_main_or_header( contours_only_text_parent_main_d, contours_only_text_parent_head_d) + def check_any_text_region_in_model_one_is_main_or_header_light( regions_model_1, regions_model_full, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered): - ### to make it faster h_o = regions_model_1.shape[0] w_o = regions_model_1.shape[1] @@ -946,42 +951,42 @@ def check_any_text_region_in_model_one_is_main_or_header_light( regions_model_full = cv2.resize(regions_model_full, (regions_model_full.shape[1] // zoom, regions_model_full.shape[0] // zoom), interpolation=cv2.INTER_NEAREST) - contours_only_text_parent = [(i / zoom).astype(int) for i in contours_only_text_parent] + contours_only_text_parent = [(i / zoom).astype(int) for i in contours_only_text_parent] ### cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \ find_new_features_of_contours(contours_only_text_parent) - length_con=x_max_main-x_min_main - height_con=y_max_main-y_min_main + length_con = x_max_main - x_min_main + height_con = y_max_main - y_min_main - all_found_textline_polygons_main=[] - all_found_textline_polygons_head=[] + all_found_textline_polygons_main = [] + all_found_textline_polygons_head = [] - all_box_coord_main=[] - all_box_coord_head=[] + all_box_coord_main = [] + all_box_coord_head = [] - slopes_main=[] - slopes_head=[] + slopes_main = [] + slopes_head = [] - contours_only_text_parent_main=[] - contours_only_text_parent_head=[] + contours_only_text_parent_main = [] + contours_only_text_parent_head = [] - contours_only_text_parent_main_d=[] - contours_only_text_parent_head_d=[] + contours_only_text_parent_main_d = [] + contours_only_text_parent_head_d = [] for ii in range(len(contours_only_text_parent)): - con=contours_only_text_parent[ii] - img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3)) + con = contours_only_text_parent[ii] + img = np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3)) img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) - all_pixels = (img[:,:,0]==255).sum() - pixels_header=((img[:,:,0]==255) & - (regions_model_full[:,:,0]==2)).sum() + all_pixels = (img[:, :, 0] == 255).sum() + pixels_header = ((img[:, :, 0] == 255) & + (regions_model_full[:, :, 0] == 2)).sum() pixels_main = all_pixels - pixels_header - if (pixels_header>=pixels_main) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ): - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 + if (pixels_header >= pixels_main) and ((length_con[ii] / float(height_con[ii])) >= 1.3): + regions_model_1[:, :][(regions_model_1[:, :] == 1) & (img[:, :, 0] == 255)] = 2 contours_only_text_parent_head.append(con) if contours_only_text_parent_d_ordered is not None: contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) @@ -989,7 +994,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light( slopes_head.append(slopes[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) else: - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1 + regions_model_1[:, :][(regions_model_1[:, :] == 1) & (img[:, :, 0] == 255)] = 1 contours_only_text_parent_main.append(con) if contours_only_text_parent_d_ordered is not None: contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) @@ -1007,7 +1012,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light( contours_only_text_parent_head = [(i * zoom).astype(int) for i in contours_only_text_parent_head] contours_only_text_parent_main = [(i * zoom).astype(int) for i in contours_only_text_parent_main] ### - + return (regions_model_1, contours_only_text_parent_main, contours_only_text_parent_head, @@ -1145,6 +1150,7 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) textlines_con_changed.append(textlines_big_org_form) return textlines_con_changed + def order_of_regions(textline_mask, contours_main, contours_header, y_ref): ##plt.imshow(textline_mask) ##plt.show() @@ -1177,7 +1183,7 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): y = textline_sum_along_width[:] y_padded = np.zeros(len(y) + 40) - y_padded[20 : len(y) + 20] = y + y_padded[20: len(y) + 20] = y x = np.arange(len(y)) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) @@ -1186,7 +1192,7 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): z = gaussian_filter1d(y_padded, sigma_gaus) zneg_rev = -y_padded + np.max(y_padded) zneg = np.zeros(len(zneg_rev) + 40) - zneg[20 : len(zneg_rev) + 20] = zneg_rev + zneg[20: len(zneg_rev) + 20] = zneg_rev zneg = gaussian_filter1d(zneg, sigma_gaus) peaks, _ = find_peaks(z, height=0) @@ -1237,13 +1243,13 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): matrix_of_orders = np.zeros((len(contours_main) + len(contours_header), 5)) matrix_of_orders[:, 0] = np.arange(len(contours_main) + len(contours_header)) matrix_of_orders[: len(contours_main), 1] = 1 - matrix_of_orders[len(contours_main) :, 1] = 2 + matrix_of_orders[len(contours_main):, 1] = 2 matrix_of_orders[: len(contours_main), 2] = cx_main - matrix_of_orders[len(contours_main) :, 2] = cx_header + matrix_of_orders[len(contours_main):, 2] = cx_header matrix_of_orders[: len(contours_main), 3] = cy_main - matrix_of_orders[len(contours_main) :, 3] = cy_header + matrix_of_orders[len(contours_main):, 3] = cy_header matrix_of_orders[: len(contours_main), 4] = np.arange(len(contours_main)) - matrix_of_orders[len(contours_main) :, 4] = np.arange(len(contours_header)) + matrix_of_orders[len(contours_main):, 4] = np.arange(len(contours_header)) # print(peaks_neg_new,'peaks_neg_new') # print(matrix_of_orders,'matrix_of_orders') @@ -1290,72 +1296,73 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new( - img_p_in_ver, img_in_hor,num_col_classifier): - + img_p_in_ver, img_in_hor, num_col_classifier): #img_p_in_ver = cv2.erode(img_p_in_ver, self.kernel, iterations=2) - img_p_in_ver=img_p_in_ver.astype(np.uint8) - img_p_in_ver=np.repeat(img_p_in_ver[:, :, np.newaxis], 3, axis=2) + img_p_in_ver = img_p_in_ver.astype(np.uint8) + img_p_in_ver = np.repeat(img_p_in_ver[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(img_p_in_ver, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_lines_ver,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + contours_lines_ver, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) slope_lines_ver, _, x_min_main_ver, _, _, _, y_min_main_ver, y_max_main_ver, cx_main_ver = \ find_features_of_lines(contours_lines_ver) for i in range(len(x_min_main_ver)): img_p_in_ver[int(y_min_main_ver[i]): - int(y_min_main_ver[i])+30, - int(cx_main_ver[i])-25: - int(cx_main_ver[i])+25, 0] = 0 - img_p_in_ver[int(y_max_main_ver[i])-30: + int(y_min_main_ver[i]) + 30, + int(cx_main_ver[i]) - 25: + int(cx_main_ver[i]) + 25, 0] = 0 + img_p_in_ver[int(y_max_main_ver[i]) - 30: int(y_max_main_ver[i]), - int(cx_main_ver[i])-25: - int(cx_main_ver[i])+25, 0] = 0 + int(cx_main_ver[i]) - 25: + int(cx_main_ver[i]) + 25, 0] = 0 - img_in_hor=img_in_hor.astype(np.uint8) - img_in_hor=np.repeat(img_in_hor[:, :, np.newaxis], 3, axis=2) + img_in_hor = img_in_hor.astype(np.uint8) + img_in_hor = np.repeat(img_in_hor[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(img_in_hor, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_lines_hor,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - + contours_lines_hor, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + slope_lines_hor, dist_x_hor, x_min_main_hor, x_max_main_hor, cy_main_hor, _, _, _, _ = \ find_features_of_lines(contours_lines_hor) - x_width_smaller_than_acolumn_width=img_in_hor.shape[1]/float(num_col_classifier+1.) - - len_lines_bigger_than_x_width_smaller_than_acolumn_width=len( dist_x_hor[dist_x_hor>=x_width_smaller_than_acolumn_width] ) - len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column=int(len_lines_bigger_than_x_width_smaller_than_acolumn_width / - float(num_col_classifier)) + x_width_smaller_than_acolumn_width = img_in_hor.shape[1] / float(num_col_classifier + 1.) + + len_lines_bigger_than_x_width_smaller_than_acolumn_width = len( + dist_x_hor[dist_x_hor >= x_width_smaller_than_acolumn_width]) + len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column = int( + len_lines_bigger_than_x_width_smaller_than_acolumn_width / + float(num_col_classifier)) if len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column < 10: - args_hor=np.arange(len(slope_lines_hor)) - all_args_uniq=contours_in_same_horizon(cy_main_hor) + args_hor = np.arange(len(slope_lines_hor)) + all_args_uniq = contours_in_same_horizon(cy_main_hor) #print(all_args_uniq,'all_args_uniq') - if len(all_args_uniq)>0: + if len(all_args_uniq) > 0: if type(all_args_uniq[0]) is list: - special_separators=[] - contours_new=[] + special_separators = [] + contours_new = [] for dd in range(len(all_args_uniq)): - merged_all=None - some_args=args_hor[all_args_uniq[dd]] - some_cy=cy_main_hor[all_args_uniq[dd]] - some_x_min=x_min_main_hor[all_args_uniq[dd]] - some_x_max=x_max_main_hor[all_args_uniq[dd]] - + merged_all = None + some_args = args_hor[all_args_uniq[dd]] + some_cy = cy_main_hor[all_args_uniq[dd]] + some_x_min = x_min_main_hor[all_args_uniq[dd]] + some_x_max = x_max_main_hor[all_args_uniq[dd]] + #img_in=np.zeros(separators_closeup_n[:,:,2].shape) #print(img_p_in_ver.shape[1],some_x_max-some_x_min,'xdiff') - diff_x_some=some_x_max-some_x_min + diff_x_some = some_x_max - some_x_min for jv in range(len(some_args)): - img_p_in=cv2.fillPoly(img_in_hor, pts=[contours_lines_hor[some_args[jv]]], color=(1,1,1)) - if any(i_diff>(img_p_in_ver.shape[1]/float(3.3)) for i_diff in diff_x_some): - img_p_in[int(np.mean(some_cy))-5: - int(np.mean(some_cy))+5, - int(np.min(some_x_min)): - int(np.max(some_x_max)) ]=1 - sum_dis=dist_x_hor[some_args].sum() - diff_max_min_uniques=np.max(x_max_main_hor[some_args])-np.min(x_min_main_hor[some_args]) - + img_p_in = cv2.fillPoly(img_in_hor, pts=[contours_lines_hor[some_args[jv]]], color=(1, 1, 1)) + if any(i_diff > (img_p_in_ver.shape[1] / float(3.3)) for i_diff in diff_x_some): + img_p_in[int(np.mean(some_cy)) - 5: + int(np.mean(some_cy)) + 5, + int(np.min(some_x_min)): + int(np.max(some_x_max))] = 1 + sum_dis = dist_x_hor[some_args].sum() + diff_max_min_uniques = np.max(x_max_main_hor[some_args]) - np.min(x_min_main_hor[some_args]) + if (diff_max_min_uniques > sum_dis and - sum_dis / float(diff_max_min_uniques) > 0.85 and - diff_max_min_uniques / float(img_p_in_ver.shape[1]) > 0.85 and - np.std(dist_x_hor[some_args]) < 0.55 * np.mean(dist_x_hor[some_args])): + sum_dis / float(diff_max_min_uniques) > 0.85 and + diff_max_min_uniques / float(img_p_in_ver.shape[1]) > 0.85 and + np.std(dist_x_hor[some_args]) < 0.55 * np.mean(dist_x_hor[some_args])): # print(dist_x_hor[some_args], # dist_x_hor[some_args].sum(), # np.min(x_min_main_hor[some_args]), @@ -1365,28 +1372,28 @@ def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new( # np.var( dist_x_hor[some_args] ),'jalibdiha') special_separators.append(np.mean(cy_main_hor[some_args])) else: - img_p_in=img_in_hor - special_separators=[] + img_p_in = img_in_hor + special_separators = [] else: - img_p_in=img_in_hor - special_separators=[] - - img_p_in_ver[:,:,0][img_p_in_ver[:,:,0]==255]=1 - sep_ver_hor=img_p_in+img_p_in_ver - sep_ver_hor_cross=(sep_ver_hor[:,:,0]==2)*1 - sep_ver_hor_cross=np.repeat(sep_ver_hor_cross[:, :, np.newaxis], 3, axis=2) - sep_ver_hor_cross=sep_ver_hor_cross.astype(np.uint8) + img_p_in = img_in_hor + special_separators = [] + + img_p_in_ver[:, :, 0][img_p_in_ver[:, :, 0] == 255] = 1 + sep_ver_hor = img_p_in + img_p_in_ver + sep_ver_hor_cross = (sep_ver_hor[:, :, 0] == 2) * 1 + sep_ver_hor_cross = np.repeat(sep_ver_hor_cross[:, :, np.newaxis], 3, axis=2) + sep_ver_hor_cross = sep_ver_hor_cross.astype(np.uint8) imgray = cv2.cvtColor(sep_ver_hor_cross, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_cross,_=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - cx_cross,cy_cross ,_ , _, _ ,_,_=find_new_features_of_contours(contours_cross) + contours_cross, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + cx_cross, cy_cross, _, _, _, _, _ = find_new_features_of_contours(contours_cross) for ii in range(len(cx_cross)): - img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])+5:int(cx_cross[ii])+40,0]=0 - img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])-40:int(cx_cross[ii])-4,0]=0 + img_p_in[int(cy_cross[ii]) - 30:int(cy_cross[ii]) + 30, int(cx_cross[ii]) + 5:int(cx_cross[ii]) + 40, 0] = 0 + img_p_in[int(cy_cross[ii]) - 30:int(cy_cross[ii]) + 30, int(cx_cross[ii]) - 40:int(cx_cross[ii]) - 4, 0] = 0 else: - img_p_in=np.copy(img_in_hor) - special_separators=[] - return img_p_in[:,:,0], special_separators + img_p_in = np.copy(img_in_hor) + special_separators = [] + return img_p_in[:, :, 0], special_separators def return_points_with_boundies(peaks_neg_fin, first_point, last_point): @@ -1399,45 +1406,45 @@ def return_points_with_boundies(peaks_neg_fin, first_point, last_point): def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, pixel_lines, contours_h=None): t_ins_c0 = time.time() - separators_closeup= (region_pre_p[:, :, :] == pixel_lines) * 1 - separators_closeup[0:110,:,:]=0 - separators_closeup[separators_closeup.shape[0]-150:,:,:]=0 - - kernel = np.ones((5,5),np.uint8) - separators_closeup=separators_closeup.astype(np.uint8) - separators_closeup = cv2.dilate(separators_closeup,kernel,iterations = 1) - separators_closeup = cv2.erode(separators_closeup,kernel,iterations = 1) - - separators_closeup_new=np.zeros((separators_closeup.shape[0] ,separators_closeup.shape[1] )) - separators_closeup_n=np.copy(separators_closeup) - separators_closeup_n=separators_closeup_n.astype(np.uint8) - - separators_closeup_n_binary=np.zeros(( separators_closeup_n.shape[0],separators_closeup_n.shape[1]) ) - separators_closeup_n_binary[:,:]=separators_closeup_n[:,:,0] - separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]!=0]=1 - - gray_early=np.repeat(separators_closeup_n_binary[:, :, np.newaxis], 3, axis=2) - gray_early=gray_early.astype(np.uint8) + separators_closeup = (region_pre_p[:, :, :] == pixel_lines) * 1 + separators_closeup[0:110, :, :] = 0 + separators_closeup[separators_closeup.shape[0] - 150:, :, :] = 0 + + kernel = np.ones((5, 5), np.uint8) + separators_closeup = separators_closeup.astype(np.uint8) + separators_closeup = cv2.dilate(separators_closeup, kernel, iterations=1) + separators_closeup = cv2.erode(separators_closeup, kernel, iterations=1) + + separators_closeup_new = np.zeros((separators_closeup.shape[0], separators_closeup.shape[1])) + separators_closeup_n = np.copy(separators_closeup) + separators_closeup_n = separators_closeup_n.astype(np.uint8) + + separators_closeup_n_binary = np.zeros((separators_closeup_n.shape[0], separators_closeup_n.shape[1])) + separators_closeup_n_binary[:, :] = separators_closeup_n[:, :, 0] + separators_closeup_n_binary[:, :][separators_closeup_n_binary[:, :] != 0] = 1 + + gray_early = np.repeat(separators_closeup_n_binary[:, :, np.newaxis], 3, axis=2) + gray_early = gray_early.astype(np.uint8) imgray_e = cv2.cvtColor(gray_early, cv2.COLOR_BGR2GRAY) ret_e, thresh_e = cv2.threshold(imgray_e, 0, 255, 0) - - contours_line_e,hierarchy_e=cv2.findContours(thresh_e,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + + contours_line_e, hierarchy_e = cv2.findContours(thresh_e, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) _, dist_xe, _, _, _, _, y_min_main, y_max_main, _ = \ find_features_of_lines(contours_line_e) dist_ye = y_max_main - y_min_main - args_e=np.arange(len(contours_line_e)) - args_hor_e=args_e[(dist_ye<=50) & - (dist_xe>=3*dist_ye)] - cnts_hor_e=[] + args_e = np.arange(len(contours_line_e)) + args_hor_e = args_e[(dist_ye <= 50) & + (dist_xe >= 3 * dist_ye)] + cnts_hor_e = [] for ce in args_hor_e: cnts_hor_e.append(contours_line_e[ce]) - figs_e=np.zeros(thresh_e.shape) - figs_e=cv2.fillPoly(figs_e,pts=cnts_hor_e,color=(1,1,1)) - - separators_closeup_n_binary=cv2.fillPoly(separators_closeup_n_binary, pts=cnts_hor_e, color=(0,0,0)) + figs_e = np.zeros(thresh_e.shape) + figs_e = cv2.fillPoly(figs_e, pts=cnts_hor_e, color=(1, 1, 1)) + + separators_closeup_n_binary = cv2.fillPoly(separators_closeup_n_binary, pts=cnts_hor_e, color=(0, 0, 0)) gray = cv2.bitwise_not(separators_closeup_n_binary) - gray=gray.astype(np.uint8) - + gray = gray.astype(np.uint8) + bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, -2) horizontal = np.copy(bw) vertical = np.copy(bw) @@ -1450,11 +1457,11 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, horizontal = cv2.erode(horizontal, horizontalStructure) horizontal = cv2.dilate(horizontal, horizontalStructure) - kernel = np.ones((5,5),np.uint8) - horizontal = cv2.dilate(horizontal,kernel,iterations = 2) - horizontal = cv2.erode(horizontal,kernel,iterations = 2) - horizontal = cv2.fillPoly(horizontal, pts=cnts_hor_e, color=(255,255,255)) - + kernel = np.ones((5, 5), np.uint8) + horizontal = cv2.dilate(horizontal, kernel, iterations=2) + horizontal = cv2.erode(horizontal, kernel, iterations=2) + horizontal = cv2.fillPoly(horizontal, pts=cnts_hor_e, color=(255, 255, 255)) + rows = vertical.shape[0] verticalsize = rows // 30 # Create structure element for extracting vertical lines through morphology operations @@ -1462,165 +1469,164 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, # Apply morphology operations vertical = cv2.erode(vertical, verticalStructure) vertical = cv2.dilate(vertical, verticalStructure) - vertical = cv2.dilate(vertical,kernel,iterations = 1) + vertical = cv2.dilate(vertical, kernel, iterations=1) horizontal, special_separators = \ combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new( vertical, horizontal, num_col_classifier) - - separators_closeup_new[:,:][vertical[:,:]!=0]=1 - separators_closeup_new[:,:][horizontal[:,:]!=0]=1 - - vertical=np.repeat(vertical[:, :, np.newaxis], 3, axis=2) - vertical=vertical.astype(np.uint8) - + + separators_closeup_new[:, :][vertical[:, :] != 0] = 1 + separators_closeup_new[:, :][horizontal[:, :] != 0] = 1 + + vertical = np.repeat(vertical[:, :, np.newaxis], 3, axis=2) + vertical = vertical.astype(np.uint8) + imgray = cv2.cvtColor(vertical, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - - contours_line_vers,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + + contours_line_vers, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) slope_lines, dist_x, x_min_main, x_max_main, cy_main, slope_lines_org, y_min_main, y_max_main, cx_main = \ find_features_of_lines(contours_line_vers) - args=np.arange(len(slope_lines)) - args_ver=args[slope_lines==1] - dist_x_ver=dist_x[slope_lines==1] - y_min_main_ver=y_min_main[slope_lines==1] - y_max_main_ver=y_max_main[slope_lines==1] - x_min_main_ver=x_min_main[slope_lines==1] - x_max_main_ver=x_max_main[slope_lines==1] - cx_main_ver=cx_main[slope_lines==1] - dist_y_ver=y_max_main_ver-y_min_main_ver - len_y=separators_closeup.shape[0]/3.0 - - horizontal=np.repeat(horizontal[:, :, np.newaxis], 3, axis=2) - horizontal=horizontal.astype(np.uint8) + args = np.arange(len(slope_lines)) + args_ver = args[slope_lines == 1] + dist_x_ver = dist_x[slope_lines == 1] + y_min_main_ver = y_min_main[slope_lines == 1] + y_max_main_ver = y_max_main[slope_lines == 1] + x_min_main_ver = x_min_main[slope_lines == 1] + x_max_main_ver = x_max_main[slope_lines == 1] + cx_main_ver = cx_main[slope_lines == 1] + dist_y_ver = y_max_main_ver - y_min_main_ver + len_y = separators_closeup.shape[0] / 3.0 + + horizontal = np.repeat(horizontal[:, :, np.newaxis], 3, axis=2) + horizontal = horizontal.astype(np.uint8) imgray = cv2.cvtColor(horizontal, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_line_hors,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + contours_line_hors, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) slope_lines, dist_x, x_min_main, x_max_main, cy_main, slope_lines_org, y_min_main, y_max_main, cx_main = \ find_features_of_lines(contours_line_hors) - - slope_lines_org_hor=slope_lines_org[slope_lines==0] - args=np.arange(len(slope_lines)) - len_x=separators_closeup.shape[1]/5.0 - dist_y=np.abs(y_max_main-y_min_main) - - args_hor=args[slope_lines==0] - dist_x_hor=dist_x[slope_lines==0] - y_min_main_hor=y_min_main[slope_lines==0] - y_max_main_hor=y_max_main[slope_lines==0] - x_min_main_hor=x_min_main[slope_lines==0] - x_max_main_hor=x_max_main[slope_lines==0] - dist_y_hor=dist_y[slope_lines==0] - cy_main_hor=cy_main[slope_lines==0] - - args_hor=args_hor[dist_x_hor>=len_x/2.0] - x_max_main_hor=x_max_main_hor[dist_x_hor>=len_x/2.0] - x_min_main_hor=x_min_main_hor[dist_x_hor>=len_x/2.0] - cy_main_hor=cy_main_hor[dist_x_hor>=len_x/2.0] - y_min_main_hor=y_min_main_hor[dist_x_hor>=len_x/2.0] - y_max_main_hor=y_max_main_hor[dist_x_hor>=len_x/2.0] - dist_y_hor=dist_y_hor[dist_x_hor>=len_x/2.0] - slope_lines_org_hor=slope_lines_org_hor[dist_x_hor>=len_x/2.0] - dist_x_hor=dist_x_hor[dist_x_hor>=len_x/2.0] - - matrix_of_lines_ch=np.zeros((len(cy_main_hor)+len(cx_main_ver),10)) - matrix_of_lines_ch[:len(cy_main_hor),0]=args_hor - matrix_of_lines_ch[len(cy_main_hor):,0]=args_ver - matrix_of_lines_ch[len(cy_main_hor):,1]=cx_main_ver - matrix_of_lines_ch[:len(cy_main_hor),2]=x_min_main_hor+50#x_min_main_hor+150 - matrix_of_lines_ch[len(cy_main_hor):,2]=x_min_main_ver - matrix_of_lines_ch[:len(cy_main_hor),3]=x_max_main_hor-50#x_max_main_hor-150 - matrix_of_lines_ch[len(cy_main_hor):,3]=x_max_main_ver - matrix_of_lines_ch[:len(cy_main_hor),4]=dist_x_hor - matrix_of_lines_ch[len(cy_main_hor):,4]=dist_x_ver - matrix_of_lines_ch[:len(cy_main_hor),5]=cy_main_hor - matrix_of_lines_ch[:len(cy_main_hor),6]=y_min_main_hor - matrix_of_lines_ch[len(cy_main_hor):,6]=y_min_main_ver - matrix_of_lines_ch[:len(cy_main_hor),7]=y_max_main_hor - matrix_of_lines_ch[len(cy_main_hor):,7]=y_max_main_ver - matrix_of_lines_ch[:len(cy_main_hor),8]=dist_y_hor - matrix_of_lines_ch[len(cy_main_hor):,8]=dist_y_ver - matrix_of_lines_ch[len(cy_main_hor):,9]=1 - + + slope_lines_org_hor = slope_lines_org[slope_lines == 0] + args = np.arange(len(slope_lines)) + len_x = separators_closeup.shape[1] / 5.0 + dist_y = np.abs(y_max_main - y_min_main) + + args_hor = args[slope_lines == 0] + dist_x_hor = dist_x[slope_lines == 0] + y_min_main_hor = y_min_main[slope_lines == 0] + y_max_main_hor = y_max_main[slope_lines == 0] + x_min_main_hor = x_min_main[slope_lines == 0] + x_max_main_hor = x_max_main[slope_lines == 0] + dist_y_hor = dist_y[slope_lines == 0] + cy_main_hor = cy_main[slope_lines == 0] + + args_hor = args_hor[dist_x_hor >= len_x / 2.0] + x_max_main_hor = x_max_main_hor[dist_x_hor >= len_x / 2.0] + x_min_main_hor = x_min_main_hor[dist_x_hor >= len_x / 2.0] + cy_main_hor = cy_main_hor[dist_x_hor >= len_x / 2.0] + y_min_main_hor = y_min_main_hor[dist_x_hor >= len_x / 2.0] + y_max_main_hor = y_max_main_hor[dist_x_hor >= len_x / 2.0] + dist_y_hor = dist_y_hor[dist_x_hor >= len_x / 2.0] + slope_lines_org_hor = slope_lines_org_hor[dist_x_hor >= len_x / 2.0] + dist_x_hor = dist_x_hor[dist_x_hor >= len_x / 2.0] + + matrix_of_lines_ch = np.zeros((len(cy_main_hor) + len(cx_main_ver), 10)) + matrix_of_lines_ch[:len(cy_main_hor), 0] = args_hor + matrix_of_lines_ch[len(cy_main_hor):, 0] = args_ver + matrix_of_lines_ch[len(cy_main_hor):, 1] = cx_main_ver + matrix_of_lines_ch[:len(cy_main_hor), 2] = x_min_main_hor + 50 #x_min_main_hor+150 + matrix_of_lines_ch[len(cy_main_hor):, 2] = x_min_main_ver + matrix_of_lines_ch[:len(cy_main_hor), 3] = x_max_main_hor - 50 #x_max_main_hor-150 + matrix_of_lines_ch[len(cy_main_hor):, 3] = x_max_main_ver + matrix_of_lines_ch[:len(cy_main_hor), 4] = dist_x_hor + matrix_of_lines_ch[len(cy_main_hor):, 4] = dist_x_ver + matrix_of_lines_ch[:len(cy_main_hor), 5] = cy_main_hor + matrix_of_lines_ch[:len(cy_main_hor), 6] = y_min_main_hor + matrix_of_lines_ch[len(cy_main_hor):, 6] = y_min_main_ver + matrix_of_lines_ch[:len(cy_main_hor), 7] = y_max_main_hor + matrix_of_lines_ch[len(cy_main_hor):, 7] = y_max_main_ver + matrix_of_lines_ch[:len(cy_main_hor), 8] = dist_y_hor + matrix_of_lines_ch[len(cy_main_hor):, 8] = dist_y_ver + matrix_of_lines_ch[len(cy_main_hor):, 9] = 1 + if contours_h is not None: _, dist_x_head, x_min_main_head, x_max_main_head, cy_main_head, _, y_min_main_head, y_max_main_head, _ = \ find_features_of_lines(contours_h) - matrix_l_n=np.zeros((matrix_of_lines_ch.shape[0]+len(cy_main_head),matrix_of_lines_ch.shape[1])) - matrix_l_n[:matrix_of_lines_ch.shape[0],:]=np.copy(matrix_of_lines_ch[:,:]) - args_head=np.arange(len(cy_main_head)) + len(cy_main_hor) - - matrix_l_n[matrix_of_lines_ch.shape[0]:,0]=args_head - matrix_l_n[matrix_of_lines_ch.shape[0]:,2]=x_min_main_head+30 - matrix_l_n[matrix_of_lines_ch.shape[0]:,3]=x_max_main_head-30 - matrix_l_n[matrix_of_lines_ch.shape[0]:,4]=dist_x_head - matrix_l_n[matrix_of_lines_ch.shape[0]:,5]=y_min_main_head-3-8 - matrix_l_n[matrix_of_lines_ch.shape[0]:,6]=y_min_main_head-5-8 - matrix_l_n[matrix_of_lines_ch.shape[0]:,7]=y_max_main_head#y_min_main_head+1-8 - matrix_l_n[matrix_of_lines_ch.shape[0]:,8]=4 - matrix_of_lines_ch=np.copy(matrix_l_n) - - cy_main_splitters=cy_main_hor[(x_min_main_hor<=.16*region_pre_p.shape[1]) & - (x_max_main_hor>=.84*region_pre_p.shape[1])] - cy_main_splitters=np.array( list(cy_main_splitters)+list(special_separators)) + matrix_l_n = np.zeros((matrix_of_lines_ch.shape[0] + len(cy_main_head), matrix_of_lines_ch.shape[1])) + matrix_l_n[:matrix_of_lines_ch.shape[0], :] = np.copy(matrix_of_lines_ch[:, :]) + args_head = np.arange(len(cy_main_head)) + len(cy_main_hor) + + matrix_l_n[matrix_of_lines_ch.shape[0]:, 0] = args_head + matrix_l_n[matrix_of_lines_ch.shape[0]:, 2] = x_min_main_head + 30 + matrix_l_n[matrix_of_lines_ch.shape[0]:, 3] = x_max_main_head - 30 + matrix_l_n[matrix_of_lines_ch.shape[0]:, 4] = dist_x_head + matrix_l_n[matrix_of_lines_ch.shape[0]:, 5] = y_min_main_head - 3 - 8 + matrix_l_n[matrix_of_lines_ch.shape[0]:, 6] = y_min_main_head - 5 - 8 + matrix_l_n[matrix_of_lines_ch.shape[0]:, 7] = y_max_main_head #y_min_main_head+1-8 + matrix_l_n[matrix_of_lines_ch.shape[0]:, 8] = 4 + matrix_of_lines_ch = np.copy(matrix_l_n) + + cy_main_splitters = cy_main_hor[(x_min_main_hor <= .16 * region_pre_p.shape[1]) & + (x_max_main_hor >= .84 * region_pre_p.shape[1])] + cy_main_splitters = np.array(list(cy_main_splitters) + list(special_separators)) if contours_h is not None: try: - cy_main_splitters_head=cy_main_head[(x_min_main_head<=.16*region_pre_p.shape[1]) & - (x_max_main_head>=.84*region_pre_p.shape[1])] - cy_main_splitters=np.array( list(cy_main_splitters)+list(cy_main_splitters_head)) + cy_main_splitters_head = cy_main_head[(x_min_main_head <= .16 * region_pre_p.shape[1]) & + (x_max_main_head >= .84 * region_pre_p.shape[1])] + cy_main_splitters = np.array(list(cy_main_splitters) + list(cy_main_splitters_head)) except: pass - args_cy_splitter=np.argsort(cy_main_splitters) - cy_main_splitters_sort=cy_main_splitters[args_cy_splitter] - - splitter_y_new= [0] + args_cy_splitter = np.argsort(cy_main_splitters) + cy_main_splitters_sort = cy_main_splitters[args_cy_splitter] + + splitter_y_new = [0] for i in range(len(cy_main_splitters_sort)): - splitter_y_new.append( cy_main_splitters_sort[i] ) + splitter_y_new.append(cy_main_splitters_sort[i]) splitter_y_new.append(region_pre_p.shape[0]) - splitter_y_new_diff=np.diff(splitter_y_new)/float(region_pre_p.shape[0])*100 - - args_big_parts=np.arange(len(splitter_y_new_diff))[ splitter_y_new_diff>22 ] - - regions_without_separators=return_regions_without_separators(region_pre_p) - length_y_threshold=regions_without_separators.shape[0]/4.0 - - num_col_fin=0 - peaks_neg_fin_fin=[] + splitter_y_new_diff = np.diff(splitter_y_new) / float(region_pre_p.shape[0]) * 100 + + args_big_parts = np.arange(len(splitter_y_new_diff))[splitter_y_new_diff > 22] + + regions_without_separators = return_regions_without_separators(region_pre_p) + length_y_threshold = regions_without_separators.shape[0] / 4.0 + + num_col_fin = 0 + peaks_neg_fin_fin = [] for itiles in args_big_parts: - regions_without_separators_tile=regions_without_separators[int(splitter_y_new[itiles]): - int(splitter_y_new[itiles+1]),:,0] + regions_without_separators_tile = regions_without_separators[int(splitter_y_new[itiles]): + int(splitter_y_new[itiles + 1]), :, 0] try: num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables, multiplier=7.0) except: num_col = 0 peaks_neg_fin = [] - if num_col>num_col_fin: - num_col_fin=num_col - peaks_neg_fin_fin=peaks_neg_fin - - if len(args_big_parts)==1 and (len(peaks_neg_fin_fin)+1)=500] - peaks_neg_fin=peaks_neg_fin[peaks_neg_fin<=(vertical.shape[1]-500)] - peaks_neg_fin_fin=peaks_neg_fin[:] - - return num_col_fin, peaks_neg_fin_fin,matrix_of_lines_ch,splitter_y_new,separators_closeup_n - + if num_col > num_col_fin: + num_col_fin = num_col + peaks_neg_fin_fin = peaks_neg_fin + + if len(args_big_parts) == 1 and (len(peaks_neg_fin_fin) + 1) < num_col_classifier: + peaks_neg_fin = find_num_col_by_vertical_lines(vertical) + peaks_neg_fin = peaks_neg_fin[peaks_neg_fin >= 500] + peaks_neg_fin = peaks_neg_fin[peaks_neg_fin <= (vertical.shape[1] - 500)] + peaks_neg_fin_fin = peaks_neg_fin[:] + + return num_col_fin, peaks_neg_fin_fin, matrix_of_lines_ch, splitter_y_new, separators_closeup_n + def return_boxes_of_images_by_order_of_reading_new( splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, tables, right2left_readingorder): - if right2left_readingorder: - regions_without_separators = cv2.flip(regions_without_separators,1) - boxes=[] + regions_without_separators = cv2.flip(regions_without_separators, 1) + boxes = [] peaks_neg_tot_tables = [] - for i in range(len(splitter_y_new)-1): + for i in range(len(splitter_y_new) - 1): #print(splitter_y_new[i],splitter_y_new[i+1]) - matrix_new = matrix_of_lines_ch[:,:][(matrix_of_lines_ch[:,6]> splitter_y_new[i] ) & - (matrix_of_lines_ch[:,7]< splitter_y_new[i+1] )] + matrix_new = matrix_of_lines_ch[:, :][(matrix_of_lines_ch[:, 6] > splitter_y_new[i]) & + (matrix_of_lines_ch[:, 7] < splitter_y_new[i + 1])] #print(len( matrix_new[:,9][matrix_new[:,9]==1] )) #print(matrix_new[:,8][matrix_new[:,9]==1],'gaddaaa') # check to see is there any vertical separator to find holes. @@ -1631,32 +1637,32 @@ def return_boxes_of_images_by_order_of_reading_new( try: if erosion_hurts: num_col, peaks_neg_fin = find_num_col( - regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], + regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i + 1]), :], num_col_classifier, tables, multiplier=6.) else: num_col, peaks_neg_fin = find_num_col( - regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], + regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i + 1]), :], num_col_classifier, tables, multiplier=7.) except: - peaks_neg_fin=[] + peaks_neg_fin = [] num_col = 0 try: - peaks_neg_fin_org=np.copy(peaks_neg_fin) - if (len(peaks_neg_fin)+1)=len(peaks_neg_fin2): - peaks_neg_fin=list(np.copy(peaks_neg_fin1)) + peaks_neg_fin2 = [] + + if len(peaks_neg_fin1) >= len(peaks_neg_fin2): + peaks_neg_fin = list(np.copy(peaks_neg_fin1)) else: - peaks_neg_fin=list(np.copy(peaks_neg_fin2)) - peaks_neg_fin=list(np.array(peaks_neg_fin)+peaks_neg_fin_early[i_n]) - - if i_n!=(len(peaks_neg_fin_early)-2): - peaks_neg_fin_rev.append(peaks_neg_fin_early[i_n+1]) + peaks_neg_fin = list(np.copy(peaks_neg_fin2)) + peaks_neg_fin = list(np.array(peaks_neg_fin) + peaks_neg_fin_early[i_n]) + + if i_n != (len(peaks_neg_fin_early) - 2): + peaks_neg_fin_rev.append(peaks_neg_fin_early[i_n + 1]) #print(peaks_neg_fin,'peaks_neg_fin') - peaks_neg_fin_rev=peaks_neg_fin_rev+peaks_neg_fin + peaks_neg_fin_rev = peaks_neg_fin_rev + peaks_neg_fin - if len(peaks_neg_fin_rev)>=len(peaks_neg_fin_org): - peaks_neg_fin=list(np.sort(peaks_neg_fin_rev)) - num_col=len(peaks_neg_fin) + if len(peaks_neg_fin_rev) >= len(peaks_neg_fin_org): + peaks_neg_fin = list(np.sort(peaks_neg_fin_rev)) + num_col = len(peaks_neg_fin) else: - peaks_neg_fin=list(np.copy(peaks_neg_fin_org)) - num_col=len(peaks_neg_fin) - + peaks_neg_fin = list(np.copy(peaks_neg_fin_org)) + num_col = len(peaks_neg_fin) + #print(peaks_neg_fin,'peaks_neg_fin') except: pass #num_col, peaks_neg_fin = find_num_col( # regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], # multiplier=7.0) - x_min_hor_some=matrix_new[:,2][ (matrix_new[:,9]==0) ] - x_max_hor_some=matrix_new[:,3][ (matrix_new[:,9]==0) ] - cy_hor_some=matrix_new[:,5][ (matrix_new[:,9]==0) ] - cy_hor_diff=matrix_new[:,7][ (matrix_new[:,9]==0) ] - arg_org_hor_some=matrix_new[:,0][ (matrix_new[:,9]==0) ] - + x_min_hor_some = matrix_new[:, 2][(matrix_new[:, 9] == 0)] + x_max_hor_some = matrix_new[:, 3][(matrix_new[:, 9] == 0)] + cy_hor_some = matrix_new[:, 5][(matrix_new[:, 9] == 0)] + cy_hor_diff = matrix_new[:, 7][(matrix_new[:, 9] == 0)] + arg_org_hor_some = matrix_new[:, 0][(matrix_new[:, 9] == 0)] + if right2left_readingorder: x_max_hor_some_new = regions_without_separators.shape[1] - x_min_hor_some x_min_hor_some_new = regions_without_separators.shape[1] - x_max_hor_some - x_min_hor_some =list(np.copy(x_min_hor_some_new)) - x_max_hor_some =list(np.copy(x_max_hor_some_new)) - - peaks_neg_tot=return_points_with_boundies(peaks_neg_fin,0, regions_without_separators[:,:].shape[1]) + x_min_hor_some = list(np.copy(x_min_hor_some_new)) + x_max_hor_some = list(np.copy(x_max_hor_some_new)) + + peaks_neg_tot = return_points_with_boundies(peaks_neg_fin, 0, regions_without_separators[:, :].shape[1]) peaks_neg_tot_tables.append(peaks_neg_tot) - + reading_order_type, x_starting, x_ending, y_type_2, y_diff_type_2, \ y_lines_without_mother, x_start_without_mother, x_end_without_mother, there_is_sep_with_child, \ y_lines_with_child_without_mother, x_start_with_child_without_mother, x_end_with_child_without_mother, \ new_main_sep_y = return_x_start_end_mothers_childs_and_type_of_reading_order( - x_min_hor_some, x_max_hor_some, cy_hor_some, peaks_neg_tot, cy_hor_diff) + x_min_hor_some, x_max_hor_some, cy_hor_some, peaks_neg_tot, cy_hor_diff) x_starting = np.array(x_starting) x_ending = np.array(x_ending) y_type_2 = np.array(y_type_2) y_diff_type_2 = np.array(y_diff_type_2) - if ((reading_order_type==1) or - (reading_order_type==0 and - (len(y_lines_without_mother)>=2 or there_is_sep_with_child==1))): + if ((reading_order_type == 1) or + (reading_order_type == 0 and + (len(y_lines_without_mother) >= 2 or there_is_sep_with_child == 1))): try: - y_grenze=int(splitter_y_new[i])+300 + y_grenze = int(splitter_y_new[i]) + 300 #check if there is a big separator in this y_mains_sep_ohne_grenzen - - args_early_ys=np.arange(len(y_type_2)) + + args_early_ys = np.arange(len(y_type_2)) #print(args_early_ys,'args_early_ys') #print(int(splitter_y_new[i]),int(splitter_y_new[i+1])) @@ -1749,39 +1755,39 @@ def return_boxes_of_images_by_order_of_reading_new( args_up = args_early_ys[(y_type_2 > int(splitter_y_new[i])) & (y_type_2 <= y_grenze)] if len(y_type_2_up) > 0: - y_main_separator_up = y_type_2_up [(x_starting_up==0) & - (x_ending_up==(len(peaks_neg_tot)-1) )] - y_diff_main_separator_up = y_diff_type_2_up[(x_starting_up==0) & - (x_ending_up==(len(peaks_neg_tot)-1) )] - args_main_to_deleted = args_up[(x_starting_up==0) & - (x_ending_up==(len(peaks_neg_tot)-1) )] + y_main_separator_up = y_type_2_up[(x_starting_up == 0) & + (x_ending_up == (len(peaks_neg_tot) - 1))] + y_diff_main_separator_up = y_diff_type_2_up[(x_starting_up == 0) & + (x_ending_up == (len(peaks_neg_tot) - 1))] + args_main_to_deleted = args_up[(x_starting_up == 0) & + (x_ending_up == (len(peaks_neg_tot) - 1))] #print(y_main_separator_up,y_diff_main_separator_up,args_main_to_deleted,'fffffjammmm') if len(y_diff_main_separator_up) > 0: - args_to_be_kept = np.array(list( set(args_early_ys) - set(args_main_to_deleted) )) + args_to_be_kept = np.array(list(set(args_early_ys) - set(args_main_to_deleted))) #print(args_to_be_kept,'args_to_be_kept') - boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1], - int(splitter_y_new[i]), int( np.max(y_diff_main_separator_up))]) - splitter_y_new[i]=[ np.max(y_diff_main_separator_up) ][0] - + boxes.append([0, peaks_neg_tot[len(peaks_neg_tot) - 1], + int(splitter_y_new[i]), int(np.max(y_diff_main_separator_up))]) + splitter_y_new[i] = [np.max(y_diff_main_separator_up)][0] + #print(splitter_y_new[i],'splitter_y_new[i]') y_type_2 = y_type_2[args_to_be_kept] x_starting = x_starting[args_to_be_kept] x_ending = x_ending[args_to_be_kept] y_diff_type_2 = y_diff_type_2[args_to_be_kept] - + #print('galdiha') - y_grenze=int(splitter_y_new[i])+200 - args_early_ys2=np.arange(len(y_type_2)) - y_type_2_up=y_type_2[(y_type_2 > int(splitter_y_new[i])) & - (y_type_2 <= y_grenze)] - x_starting_up=x_starting[(y_type_2 > int(splitter_y_new[i])) & - (y_type_2 <= y_grenze)] - x_ending_up=x_ending[(y_type_2 > int(splitter_y_new[i])) & - (y_type_2 <= y_grenze)] - y_diff_type_2_up=y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) & - (y_type_2 <= y_grenze)] - args_up2=args_early_ys2[(y_type_2 > int(splitter_y_new[i])) & - (y_type_2 <= y_grenze)] + y_grenze = int(splitter_y_new[i]) + 200 + args_early_ys2 = np.arange(len(y_type_2)) + y_type_2_up = y_type_2[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + x_starting_up = x_starting[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + x_ending_up = x_ending[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + y_diff_type_2_up = y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] + args_up2 = args_early_ys2[(y_type_2 > int(splitter_y_new[i])) & + (y_type_2 <= y_grenze)] #print(y_type_2_up,x_starting_up,x_ending_up,'didid') nodes_in = [] for ij in range(len(x_starting_up)): @@ -1789,16 +1795,16 @@ def return_boxes_of_images_by_order_of_reading_new( x_ending_up[ij])) nodes_in = np.unique(nodes_in) #print(nodes_in,'nodes_in') - - if set(nodes_in)==set(range(len(peaks_neg_tot)-1)): + + if set(nodes_in) == set(range(len(peaks_neg_tot) - 1)): pass - elif set(nodes_in)==set(range(1, len(peaks_neg_tot)-1)): + elif set(nodes_in) == set(range(1, len(peaks_neg_tot) - 1)): pass else: #print('burdaydikh') - args_to_be_kept2=np.array(list( set(args_early_ys2)-set(args_up2) )) - - if len(args_to_be_kept2)>0: + args_to_be_kept2 = np.array(list(set(args_early_ys2) - set(args_up2))) + + if len(args_to_be_kept2) > 0: y_type_2 = y_type_2[args_to_be_kept2] x_starting = x_starting[args_to_be_kept2] x_ending = x_ending[args_to_be_kept2] @@ -1806,7 +1812,7 @@ def return_boxes_of_images_by_order_of_reading_new( else: pass #print('burdaydikh2') - elif len(y_diff_main_separator_up)==0: + elif len(y_diff_main_separator_up) == 0: nodes_in = [] for ij in range(len(x_starting_up)): nodes_in = nodes_in + list(range(x_starting_up[ij], @@ -1814,20 +1820,20 @@ def return_boxes_of_images_by_order_of_reading_new( nodes_in = np.unique(nodes_in) #print(nodes_in,'nodes_in2') #print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))') - - if set(nodes_in)==set(range(len(peaks_neg_tot)-1)): + + if set(nodes_in) == set(range(len(peaks_neg_tot) - 1)): pass - elif set(nodes_in)==set(range(1,len(peaks_neg_tot)-1)): + elif set(nodes_in) == set(range(1, len(peaks_neg_tot) - 1)): pass else: #print('burdaydikh') #print(args_early_ys,'args_early_ys') #print(args_up,'args_up') - args_to_be_kept2=np.array(list( set(args_early_ys) - set(args_up) )) - + args_to_be_kept2 = np.array(list(set(args_early_ys) - set(args_up))) + #print(args_to_be_kept2,'args_to_be_kept2') #print(len(y_type_2),len(x_starting),len(x_ending),len(y_diff_type_2)) - if len(args_to_be_kept2)>0: + if len(args_to_be_kept2) > 0: y_type_2 = y_type_2[args_to_be_kept2] x_starting = x_starting[args_to_be_kept2] x_ending = x_ending[args_to_be_kept2] @@ -1835,28 +1841,30 @@ def return_boxes_of_images_by_order_of_reading_new( else: pass #print('burdaydikh2') - + #int(splitter_y_new[i]) - y_lines_by_order=[] - x_start_by_order=[] - x_end_by_order=[] - if (len(x_end_with_child_without_mother)==0 and reading_order_type==0) or reading_order_type==1: - if reading_order_type==1: + y_lines_by_order = [] + x_start_by_order = [] + x_end_by_order = [] + if ( + len(x_end_with_child_without_mother) == 0 and reading_order_type == 0) or reading_order_type == 1: + if reading_order_type == 1: y_lines_by_order.append(int(splitter_y_new[i])) x_start_by_order.append(0) - x_end_by_order.append(len(peaks_neg_tot)-2) + x_end_by_order.append(len(peaks_neg_tot) - 2) else: #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') columns_covered_by_mothers = [] for dj in range(len(x_start_without_mother)): columns_covered_by_mothers = columns_covered_by_mothers + \ - list(range(x_start_without_mother[dj], - x_end_without_mother[dj])) + list(range(x_start_without_mother[dj], + x_end_without_mother[dj])) columns_covered_by_mothers = list(set(columns_covered_by_mothers)) - all_columns=np.arange(len(peaks_neg_tot)-1) - columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers)) - y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) + all_columns = np.arange(len(peaks_neg_tot) - 1) + columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers)) + y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * ( + len(columns_not_covered) + len(x_start_without_mother))) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) x_starting = np.append(x_starting, columns_not_covered) @@ -1864,58 +1872,60 @@ def return_boxes_of_images_by_order_of_reading_new( x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) x_ending = np.append(x_ending, x_end_without_mother) - ind_args=np.arange(len(y_type_2)) + ind_args = np.arange(len(y_type_2)) #ind_args=np.array(ind_args) #print(ind_args,'ind_args') - for column in range(len(peaks_neg_tot)-1): + for column in range(len(peaks_neg_tot) - 1): #print(column,'column') - ind_args_in_col=ind_args[x_starting==column] + ind_args_in_col = ind_args[x_starting == column] #print('babali2') #print(ind_args_in_col,'ind_args_in_col') - ind_args_in_col=np.array(ind_args_in_col) + ind_args_in_col = np.array(ind_args_in_col) #print(len(y_type_2)) - y_column=y_type_2[ind_args_in_col] - x_start_column=x_starting[ind_args_in_col] - x_end_column=x_ending[ind_args_in_col] + y_column = y_type_2[ind_args_in_col] + x_start_column = x_starting[ind_args_in_col] + x_end_column = x_ending[ind_args_in_col] #print('babali3') - ind_args_col_sorted=np.argsort(y_column) - y_col_sort=y_column[ind_args_col_sorted] - x_start_column_sort=x_start_column[ind_args_col_sorted] - x_end_column_sort=x_end_column[ind_args_col_sorted] + ind_args_col_sorted = np.argsort(y_column) + y_col_sort = y_column[ind_args_col_sorted] + x_start_column_sort = x_start_column[ind_args_col_sorted] + x_end_column_sort = x_end_column[ind_args_col_sorted] #print('babali4') for ii in range(len(y_col_sort)): #print('babali5') y_lines_by_order.append(y_col_sort[ii]) x_start_by_order.append(x_start_column_sort[ii]) - x_end_by_order.append(x_end_column_sort[ii]-1) + x_end_by_order.append(x_end_column_sort[ii] - 1) else: #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') columns_covered_by_mothers = [] for dj in range(len(x_start_without_mother)): columns_covered_by_mothers = columns_covered_by_mothers + \ - list(range(x_start_without_mother[dj], - x_end_without_mother[dj])) + list(range(x_start_without_mother[dj], + x_end_without_mother[dj])) columns_covered_by_mothers = list(set(columns_covered_by_mothers)) - - all_columns=np.arange(len(peaks_neg_tot)-1) - columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers)) - y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) + + all_columns = np.arange(len(peaks_neg_tot) - 1) + columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers)) + y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * ( + len(columns_not_covered) + len(x_start_without_mother))) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) x_starting = np.append(x_starting, columns_not_covered) x_starting = np.append(x_starting, x_start_without_mother) x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) x_ending = np.append(x_ending, x_end_without_mother) - + columns_covered_by_with_child_no_mothers = [] for dj in range(len(x_end_with_child_without_mother)): columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \ - list(range(x_start_with_child_without_mother[dj], - x_end_with_child_without_mother[dj])) + list(range(x_start_with_child_without_mother[dj], + x_end_with_child_without_mother[dj])) columns_covered_by_with_child_no_mothers = list(set(columns_covered_by_with_child_no_mothers)) - - all_columns = np.arange(len(peaks_neg_tot)-1) - columns_not_covered_child_no_mother = list(set(all_columns) - set(columns_covered_by_with_child_no_mothers)) + + all_columns = np.arange(len(peaks_neg_tot) - 1) + columns_not_covered_child_no_mother = list( + set(all_columns) - set(columns_covered_by_with_child_no_mothers)) #indexes_to_be_spanned=[] for i_s in range(len(x_end_with_child_without_mother)): columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s]) @@ -1925,165 +1935,178 @@ def return_boxes_of_images_by_order_of_reading_new( x_start_with_child_without_mother = np.array(x_start_with_child_without_mother) for i_s_nc in columns_not_covered_child_no_mother: if i_s_nc in x_start_with_child_without_mother: - x_end_biggest_column = x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0] - args_all_biggest_lines = ind_args[(x_starting==i_s_nc) & - (x_ending==x_end_biggest_column)] + x_end_biggest_column = \ + x_end_with_child_without_mother[x_start_with_child_without_mother == i_s_nc][0] + args_all_biggest_lines = ind_args[(x_starting == i_s_nc) & + (x_ending == x_end_biggest_column)] y_column_nc = y_type_2[args_all_biggest_lines] x_start_column_nc = x_starting[args_all_biggest_lines] x_end_column_nc = x_ending[args_all_biggest_lines] y_column_nc = np.sort(y_column_nc) for i_c in range(len(y_column_nc)): - if i_c==(len(y_column_nc)-1): - ind_all_lines_between_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) & - (y_type_2=i_s_nc) & - (x_ending<=x_end_biggest_column)] + if i_c == (len(y_column_nc) - 1): + ind_all_lines_between_nm_wc = ind_args[(y_type_2 > y_column_nc[i_c]) & + (y_type_2 < int(splitter_y_new[i + 1])) & + (x_starting >= i_s_nc) & + (x_ending <= x_end_biggest_column)] else: - ind_all_lines_between_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) & - (y_type_2=i_s_nc) & - (x_ending<=x_end_biggest_column)] + ind_all_lines_between_nm_wc = ind_args[(y_type_2 > y_column_nc[i_c]) & + (y_type_2 < y_column_nc[i_c + 1]) & + (x_starting >= i_s_nc) & + (x_ending <= x_end_biggest_column)] y_all_between_nm_wc = y_type_2[ind_all_lines_between_nm_wc] x_starting_all_between_nm_wc = x_starting[ind_all_lines_between_nm_wc] x_ending_all_between_nm_wc = x_ending[ind_all_lines_between_nm_wc] x_diff_all_between_nm_wc = x_ending_all_between_nm_wc - x_starting_all_between_nm_wc - if len(x_diff_all_between_nm_wc)>0: - biggest=np.argmax(x_diff_all_between_nm_wc) - + if len(x_diff_all_between_nm_wc) > 0: + biggest = np.argmax(x_diff_all_between_nm_wc) + columns_covered_by_mothers = [] for dj in range(len(x_starting_all_between_nm_wc)): columns_covered_by_mothers = columns_covered_by_mothers + \ - list(range(x_starting_all_between_nm_wc[dj], - x_ending_all_between_nm_wc[dj])) + list(range(x_starting_all_between_nm_wc[dj], + x_ending_all_between_nm_wc[dj])) columns_covered_by_mothers = list(set(columns_covered_by_mothers)) - - all_columns=np.arange(i_s_nc, x_end_biggest_column) + + all_columns = np.arange(i_s_nc, x_end_biggest_column) columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers)) - - should_longest_line_be_extended=0 + + should_longest_line_be_extended = 0 if (len(x_diff_all_between_nm_wc) > 0 and - set(list(range(x_starting_all_between_nm_wc[biggest], - x_ending_all_between_nm_wc[biggest])) + - list(columns_not_covered)) != set(all_columns)): - should_longest_line_be_extended=1 + set(list(range(x_starting_all_between_nm_wc[biggest], + x_ending_all_between_nm_wc[biggest])) + + list(columns_not_covered)) != set(all_columns)): + should_longest_line_be_extended = 1 index_lines_so_close_to_top_separator = \ - np.arange(len(y_all_between_nm_wc))[(y_all_between_nm_wc>y_column_nc[i_c]) & - (y_all_between_nm_wc<=(y_column_nc[i_c]+500))] + np.arange(len(y_all_between_nm_wc))[ + (y_all_between_nm_wc > y_column_nc[i_c]) & + (y_all_between_nm_wc <= (y_column_nc[i_c] + 500))] if len(index_lines_so_close_to_top_separator) > 0: - indexes_remained_after_deleting_closed_lines= \ + indexes_remained_after_deleting_closed_lines = \ np.array(list(set(list(range(len(y_all_between_nm_wc)))) - set(list(index_lines_so_close_to_top_separator)))) if len(indexes_remained_after_deleting_closed_lines) > 0: - y_all_between_nm_wc = y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - x_starting_all_between_nm_wc = x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - x_ending_all_between_nm_wc = x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - + y_all_between_nm_wc = y_all_between_nm_wc[ + indexes_remained_after_deleting_closed_lines] + x_starting_all_between_nm_wc = x_starting_all_between_nm_wc[ + indexes_remained_after_deleting_closed_lines] + x_ending_all_between_nm_wc = x_ending_all_between_nm_wc[ + indexes_remained_after_deleting_closed_lines] + y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c]) x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, i_s_nc) - x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, x_end_biggest_column) - + x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, + x_end_biggest_column) + if len(x_diff_all_between_nm_wc) > 0: try: y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c]) - x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, x_starting_all_between_nm_wc[biggest]) - x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, x_ending_all_between_nm_wc[biggest]) + x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, + x_starting_all_between_nm_wc[ + biggest]) + x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, + x_ending_all_between_nm_wc[biggest]) except: pass - - y_all_between_nm_wc = np.append(y_all_between_nm_wc, [y_column_nc[i_c]] * len(columns_not_covered)) - x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, columns_not_covered) - x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered) + 1) - - ind_args_between=np.arange(len(x_ending_all_between_nm_wc)) + + y_all_between_nm_wc = np.append(y_all_between_nm_wc, + [y_column_nc[i_c]] * len(columns_not_covered)) + x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, + columns_not_covered) + x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, + np.array(columns_not_covered) + 1) + + ind_args_between = np.arange(len(x_ending_all_between_nm_wc)) for column in range(i_s_nc, x_end_biggest_column): - ind_args_in_col=ind_args_between[x_starting_all_between_nm_wc==column] + ind_args_in_col = ind_args_between[x_starting_all_between_nm_wc == column] #print('babali2') #print(ind_args_in_col,'ind_args_in_col') - ind_args_in_col=np.array(ind_args_in_col) + ind_args_in_col = np.array(ind_args_in_col) #print(len(y_type_2)) - y_column=y_all_between_nm_wc[ind_args_in_col] - x_start_column=x_starting_all_between_nm_wc[ind_args_in_col] - x_end_column=x_ending_all_between_nm_wc[ind_args_in_col] + y_column = y_all_between_nm_wc[ind_args_in_col] + x_start_column = x_starting_all_between_nm_wc[ind_args_in_col] + x_end_column = x_ending_all_between_nm_wc[ind_args_in_col] #print('babali3') - ind_args_col_sorted=np.argsort(y_column) - y_col_sort=y_column[ind_args_col_sorted] - x_start_column_sort=x_start_column[ind_args_col_sorted] - x_end_column_sort=x_end_column[ind_args_col_sorted] + ind_args_col_sorted = np.argsort(y_column) + y_col_sort = y_column[ind_args_col_sorted] + x_start_column_sort = x_start_column[ind_args_col_sorted] + x_end_column_sort = x_end_column[ind_args_col_sorted] #print('babali4') for ii in range(len(y_col_sort)): #print('babali5') y_lines_by_order.append(y_col_sort[ii]) x_start_by_order.append(x_start_column_sort[ii]) - x_end_by_order.append(x_end_column_sort[ii]-1) + x_end_by_order.append(x_end_column_sort[ii] - 1) else: #print(column,'column') - ind_args_in_col=ind_args[x_starting==i_s_nc] + ind_args_in_col = ind_args[x_starting == i_s_nc] #print('babali2') #print(ind_args_in_col,'ind_args_in_col') - ind_args_in_col=np.array(ind_args_in_col) + ind_args_in_col = np.array(ind_args_in_col) #print(len(y_type_2)) - y_column=y_type_2[ind_args_in_col] - x_start_column=x_starting[ind_args_in_col] - x_end_column=x_ending[ind_args_in_col] + y_column = y_type_2[ind_args_in_col] + x_start_column = x_starting[ind_args_in_col] + x_end_column = x_ending[ind_args_in_col] #print('babali3') - ind_args_col_sorted=np.argsort(y_column) - y_col_sort=y_column[ind_args_col_sorted] - x_start_column_sort=x_start_column[ind_args_col_sorted] - x_end_column_sort=x_end_column[ind_args_col_sorted] + ind_args_col_sorted = np.argsort(y_column) + y_col_sort = y_column[ind_args_col_sorted] + x_start_column_sort = x_start_column[ind_args_col_sorted] + x_end_column_sort = x_end_column[ind_args_col_sorted] #print('babali4') for ii in range(len(y_col_sort)): y_lines_by_order.append(y_col_sort[ii]) x_start_by_order.append(x_start_column_sort[ii]) - x_end_by_order.append(x_end_column_sort[ii]-1) - + x_end_by_order.append(x_end_column_sort[ii] - 1) + for il in range(len(y_lines_by_order)): y_copy = list(y_lines_by_order) x_start_copy = list(x_start_by_order) x_end_copy = list(x_end_by_order) - + #print(y_copy,'y_copy') - y_itself=y_copy.pop(il) - x_start_itself=x_start_copy.pop(il) - x_end_itself=x_end_copy.pop(il) - + y_itself = y_copy.pop(il) + x_start_itself = x_start_copy.pop(il) + x_end_itself = x_end_copy.pop(il) + #print(y_copy,'y_copy2') - for column in range(x_start_itself, x_end_itself+1): + for column in range(x_start_itself, x_end_itself + 1): #print(column,'cols') - y_in_cols=[] + y_in_cols = [] for yic in range(len(y_copy)): #print('burda') - if (y_copy[yic]>y_itself and + if (y_copy[yic] > y_itself and x_start_copy[yic] <= column <= x_end_copy[yic]): y_in_cols.append(y_copy[yic]) #print('burda2') #print(y_in_cols,'y_in_cols') - if len(y_in_cols)>0: - y_down=np.min(y_in_cols) + if len(y_in_cols) > 0: + y_down = np.min(y_in_cols) else: - y_down=[int(splitter_y_new[i+1])][0] + y_down = [int(splitter_y_new[i + 1])][0] #print(y_itself,'y_itself') boxes.append([peaks_neg_tot[column], - peaks_neg_tot[column+1], + peaks_neg_tot[column + 1], y_itself, y_down]) except: - boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1], - int(splitter_y_new[i]), int(splitter_y_new[i+1])]) + boxes.append([0, peaks_neg_tot[len(peaks_neg_tot) - 1], + int(splitter_y_new[i]), int(splitter_y_new[i + 1])]) else: - y_lines_by_order=[] - x_start_by_order=[] - x_end_by_order=[] - if len(x_starting)>0: - all_columns = np.arange(len(peaks_neg_tot)-1) + y_lines_by_order = [] + x_start_by_order = [] + x_end_by_order = [] + if len(x_starting) > 0: + all_columns = np.arange(len(peaks_neg_tot) - 1) columns_covered_by_lines_covered_more_than_2col = [] for dj in range(len(x_starting)): - if set(list(range(x_starting[dj],x_ending[dj]))) == set(all_columns): + if set(list(range(x_starting[dj], x_ending[dj]))) == set(all_columns): pass else: columns_covered_by_lines_covered_more_than_2col = columns_covered_by_lines_covered_more_than_2col + \ - list(range(x_starting[dj],x_ending[dj])) - columns_covered_by_lines_covered_more_than_2col = list(set(columns_covered_by_lines_covered_more_than_2col)) + list(range(x_starting[dj], x_ending[dj])) + columns_covered_by_lines_covered_more_than_2col = list( + set(columns_covered_by_lines_covered_more_than_2col)) columns_not_covered = list(set(all_columns) - set(columns_covered_by_lines_covered_more_than_2col)) y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1)) @@ -2093,82 +2116,82 @@ def return_boxes_of_images_by_order_of_reading_new( x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) if len(new_main_sep_y) > 0: x_starting = np.append(x_starting, 0) - x_ending = np.append(x_ending, len(peaks_neg_tot)-1) + x_ending = np.append(x_ending, len(peaks_neg_tot) - 1) else: x_starting = np.append(x_starting, x_starting[0]) x_ending = np.append(x_ending, x_ending[0]) else: - all_columns = np.arange(len(peaks_neg_tot)-1) + all_columns = np.arange(len(peaks_neg_tot) - 1) columns_not_covered = list(set(all_columns)) y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered)) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) x_starting = np.append(x_starting, columns_not_covered) x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) - - ind_args=np.array(range(len(y_type_2))) + + ind_args = np.array(range(len(y_type_2))) #ind_args=np.array(ind_args) - for column in range(len(peaks_neg_tot)-1): + for column in range(len(peaks_neg_tot) - 1): #print(column,'column') - ind_args_in_col=ind_args[x_starting==column] - ind_args_in_col=np.array(ind_args_in_col) + ind_args_in_col = ind_args[x_starting == column] + ind_args_in_col = np.array(ind_args_in_col) #print(len(y_type_2)) - y_column=y_type_2[ind_args_in_col] - x_start_column=x_starting[ind_args_in_col] - x_end_column=x_ending[ind_args_in_col] - - ind_args_col_sorted=np.argsort(y_column) - y_col_sort=y_column[ind_args_col_sorted] - x_start_column_sort=x_start_column[ind_args_col_sorted] - x_end_column_sort=x_end_column[ind_args_col_sorted] + y_column = y_type_2[ind_args_in_col] + x_start_column = x_starting[ind_args_in_col] + x_end_column = x_ending[ind_args_in_col] + + ind_args_col_sorted = np.argsort(y_column) + y_col_sort = y_column[ind_args_col_sorted] + x_start_column_sort = x_start_column[ind_args_col_sorted] + x_end_column_sort = x_end_column[ind_args_col_sorted] #print('babali4') for ii in range(len(y_col_sort)): #print('babali5') y_lines_by_order.append(y_col_sort[ii]) x_start_by_order.append(x_start_column_sort[ii]) - x_end_by_order.append(x_end_column_sort[ii]-1) - + x_end_by_order.append(x_end_column_sort[ii] - 1) + for il in range(len(y_lines_by_order)): y_copy = list(y_lines_by_order) x_start_copy = list(x_start_by_order) x_end_copy = list(x_end_by_order) - + #print(y_copy,'y_copy') - y_itself=y_copy.pop(il) - x_start_itself=x_start_copy.pop(il) - x_end_itself=x_end_copy.pop(il) - + y_itself = y_copy.pop(il) + x_start_itself = x_start_copy.pop(il) + x_end_itself = x_end_copy.pop(il) + #print(y_copy,'y_copy2') - for column in range(x_start_itself, x_end_itself+1): + for column in range(x_start_itself, x_end_itself + 1): #print(column,'cols') - y_in_cols=[] + y_in_cols = [] for yic in range(len(y_copy)): #print('burda') - if (y_copy[yic]>y_itself and + if (y_copy[yic] > y_itself and x_start_copy[yic] <= column <= x_end_copy[yic]): y_in_cols.append(y_copy[yic]) #print('burda2') #print(y_in_cols,'y_in_cols') - if len(y_in_cols)>0: - y_down=np.min(y_in_cols) + if len(y_in_cols) > 0: + y_down = np.min(y_in_cols) else: - y_down=[int(splitter_y_new[i+1])][0] + y_down = [int(splitter_y_new[i + 1])][0] #print(y_itself,'y_itself') boxes.append([peaks_neg_tot[column], - peaks_neg_tot[column+1], + peaks_neg_tot[column + 1], y_itself, y_down]) #else: - #boxes.append([ 0, regions_without_separators[:,:].shape[1] ,splitter_y_new[i],splitter_y_new[i+1]]) - - if right2left_readingorder: + #boxes.append([ 0, regions_without_separators[:,:].shape[1] ,splitter_y_new[i],splitter_y_new[i+1]]) + + if right2left_readingorder: peaks_neg_tot_tables_new = [] - if len(peaks_neg_tot_tables)>=1: + if len(peaks_neg_tot_tables) >= 1: for peaks_tab_ind in peaks_neg_tot_tables: peaks_neg_tot_tables_ind = regions_without_separators.shape[1] - np.array(peaks_tab_ind) peaks_neg_tot_tables_ind = list(peaks_neg_tot_tables_ind[::-1]) peaks_neg_tot_tables_new.append(peaks_neg_tot_tables_ind) - + for i in range(len(boxes)): x_start_new = regions_without_separators.shape[1] - boxes[i][1] x_end_new = regions_without_separators.shape[1] - boxes[i][0] diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index d2dcd5f..09738cd 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -43,14 +43,14 @@ def get_text_region_boxes_by_given_contours(contours): def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area): found_polygons_early = [] - for jv,c in enumerate(contours): + for jv, c in enumerate(contours): if len(c) < 3: # A polygon cannot have less than 3 points continue polygon = geometry.Polygon([point[0] for point in c]) area = polygon.area if (min_area * np.prod(image.shape[:2]) <= area <= max_area * np.prod(image.shape[:2]) and - hierarchy[0][jv][3] == -1): + hierarchy[0][jv][3] == -1): found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.uint)) return found_polygons_early @@ -58,7 +58,7 @@ def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area): found_polygons_early = [] - for jv,c in enumerate(contours): + for jv, c in enumerate(contours): if len(c) < 3: # A polygon cannot have less than 3 points continue @@ -69,8 +69,8 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m # Check that polygon has area greater than minimal area # print(hierarchy[0][jv][3],hierarchy ) if (min_area * np.prod(image.shape[:2]) <= area <= max_area * np.prod(image.shape[:2]) and - # hierarchy[0][jv][3]==-1 - True): + # hierarchy[0][jv][3]==-1 + True): # print(c[0][0][1]) found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32)) @@ -122,15 +122,15 @@ def find_new_features_of_contours(contours_main): def find_features_of_contours(contours_main): - areas_main=np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) - M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))] - cx_main=[(M_main[j]['m10']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))] - cy_main=[(M_main[j]['m01']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))] - x_min_main=np.array([np.min(contours_main[j][:,0,0]) for j in range(len(contours_main))]) - x_max_main=np.array([np.max(contours_main[j][:,0,0]) for j in range(len(contours_main))]) + areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) + M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] + cx_main = [(M_main[j]['m10'] / (M_main[j]['m00'] + 1e-32)) for j in range(len(M_main))] + cy_main = [(M_main[j]['m01'] / (M_main[j]['m00'] + 1e-32)) for j in range(len(M_main))] + x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))]) + x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))]) - y_min_main=np.array([np.min(contours_main[j][:,0,1]) for j in range(len(contours_main))]) - y_max_main=np.array([np.max(contours_main[j][:,0,1]) for j in range(len(contours_main))]) + y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) + y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) return y_min_main, y_max_main @@ -257,17 +257,17 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first def get_textregion_contours_in_org_image_light(cnts, img, slope_first, map=map): if not len(cnts): return [] - img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST) + img = cv2.resize(img, (int(img.shape[1] / 6), int(img.shape[0] / 6)), interpolation=cv2.INTER_NEAREST) ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) #cnts = cnts/2 - cnts = [(i/6).astype(np.int) for i in cnts] + cnts = [(i / 6).astype(np.int) for i in cnts] results = map(partial(do_back_rotation_and_get_cnt_back, img=img, slope_first=slope_first, ), cnts, range(len(cnts))) contours, indexes = tuple(zip(*results)) - return [i*6 for i in contours] + return [i * 6 for i in contours] def return_contours_of_interested_textline(region_pre_p, pixel): @@ -339,4 +339,3 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, img_ret = cv2.fillPoly(img_ret, pts=contours_imgs, color=(1, 1, 1)) return img_ret[:, :, 0] - diff --git a/src/eynollah/utils/drop_capitals.py b/src/eynollah/utils/drop_capitals.py index 52a4e7d..0e84a05 100644 --- a/src/eynollah/utils/drop_capitals.py +++ b/src/eynollah/utils/drop_capitals.py @@ -9,17 +9,17 @@ from .contour import ( def adhere_drop_capital_region_into_corresponding_textline( - text_regions_p, - polygons_of_drop_capitals, - contours_only_text_parent, - contours_only_text_parent_h, - all_box_coord, - all_box_coord_h, - all_found_textline_polygons, - all_found_textline_polygons_h, - kernel=None, - curved_line=False, - textline_light=False, + text_regions_p, + polygons_of_drop_capitals, + contours_only_text_parent, + contours_only_text_parent_h, + all_box_coord, + all_box_coord_h, + all_found_textline_polygons, + all_found_textline_polygons_h, + kernel=None, + curved_line=False, + textline_light=False, ): # print(np.shape(all_found_textline_polygons),np.shape(all_found_textline_polygons[3]),'all_found_textline_polygonsshape') # print(all_found_textline_polygons[3]) @@ -29,7 +29,8 @@ def adhere_drop_capital_region_into_corresponding_textline( img_con_all = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3)) for j_cont in range(len(contours_only_text_parent)): - img_con_all[all_box_coord[j_cont][0] : all_box_coord[j_cont][1], all_box_coord[j_cont][2] : all_box_coord[j_cont][3], 0] = (j_cont + 1) * 3 + img_con_all[all_box_coord[j_cont][0]: all_box_coord[j_cont][1], + all_box_coord[j_cont][2]: all_box_coord[j_cont][3], 0] = (j_cont + 1) * 3 # img_con_all=cv2.fillPoly(img_con_all,pts=[contours_only_text_parent[j_cont]],color=((j_cont+1)*3,(j_cont+1)*3,(j_cont+1)*3)) # plt.imshow(img_con_all[:,:,0]) @@ -85,14 +86,16 @@ def adhere_drop_capital_region_into_corresponding_textline( sum_pixels_of_intersection = [] for i in range(len(region_with_intersected_drop)): # print((region_with_intersected_drop[i]*3+1)) - sum_pixels_of_intersection.append(((img_con_all_copy[:, :, 0] == (region_with_intersected_drop[i] * 3 + 1)) * 1).sum()) + sum_pixels_of_intersection.append( + ((img_con_all_copy[:, :, 0] == (region_with_intersected_drop[i] * 3 + 1)) * 1).sum()) # print(sum_pixels_of_intersection) region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1 # print(region_final,'region_final') # cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) try: - cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours( + all_found_textline_polygons[int(region_final)]) # print(all_box_coord[j_cont]) # print(cx_t) # print(cy_t) @@ -109,26 +112,30 @@ def adhere_drop_capital_region_into_corresponding_textline( # print(arg_min) cnt_nearest = np.copy(all_found_textline_polygons[int(region_final)][arg_min]) - cnt_nearest[:, 0, 0] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, 0] # +all_box_coord[int(region_final)][2] - cnt_nearest[:, 0, 1] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, 1] # +all_box_coord[int(region_final)][0] + cnt_nearest[:, 0, 0] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, + 0] # +all_box_coord[int(region_final)][2] + cnt_nearest[:, 0, 1] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, + 1] # +all_box_coord[int(region_final)][0] img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3)) img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255)) - img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255)) + img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], + color=(255, 255, 255)) img_textlines = img_textlines.astype(np.uint8) - + contours_combined = return_contours_of_interested_region(img_textlines, 255, 0) - + #plt.imshow(img_textlines) #plt.show() - + #imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) #ret, thresh = cv2.threshold(imgray, 0, 255, 0) #contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) + areas_cnt_text = np.array( + [cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) contours_biggest = contours_combined[np.argmax(areas_cnt_text)] @@ -138,11 +145,12 @@ def adhere_drop_capital_region_into_corresponding_textline( # contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0] # contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2]) - - if len(contours_combined)==1: + + if len(contours_combined) == 1: all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest - elif len(contours_combined)==2: - all_found_textline_polygons[int(region_final)].insert(arg_min, polygons_of_drop_capitals[i_drop] ) + elif len(contours_combined) == 2: + all_found_textline_polygons[int(region_final)].insert(arg_min, + polygons_of_drop_capitals[i_drop]) else: pass @@ -156,7 +164,8 @@ def adhere_drop_capital_region_into_corresponding_textline( # cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) try: - cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours( + all_found_textline_polygons[int(region_final)]) # print(all_box_coord[j_cont]) # print(cx_t) # print(cy_t) @@ -171,23 +180,26 @@ def adhere_drop_capital_region_into_corresponding_textline( # print(arg_min) cnt_nearest = np.copy(all_found_textline_polygons[int(region_final)][arg_min]) - cnt_nearest[:, 0, 0] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, 0] # +all_box_coord[int(region_final)][2] - cnt_nearest[:, 0, 1] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, 1] # +all_box_coord[int(region_final)][0] + cnt_nearest[:, 0, 0] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, + 0] # +all_box_coord[int(region_final)][2] + cnt_nearest[:, 0, 1] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, + 1] # +all_box_coord[int(region_final)][0] img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3)) img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255)) - img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255)) + img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], + color=(255, 255, 255)) img_textlines = img_textlines.astype(np.uint8) - contours_combined = return_contours_of_interested_region(img_textlines, 255, 0) ##imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) ##ret, thresh = cv2.threshold(imgray, 0, 255, 0) ##contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) + areas_cnt_text = np.array( + [cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) contours_biggest = contours_combined[np.argmax(areas_cnt_text)] @@ -198,10 +210,11 @@ def adhere_drop_capital_region_into_corresponding_textline( # print(np.shape(contours_biggest),'contours_biggest') # print(np.shape(all_found_textline_polygons[int(region_final)][arg_min])) ##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2]) - if len(contours_combined)==1: + if len(contours_combined) == 1: all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest - elif len(contours_combined)==2: - all_found_textline_polygons[int(region_final)].insert(arg_min, polygons_of_drop_capitals[i_drop] ) + elif len(contours_combined) == 2: + all_found_textline_polygons[int(region_final)].insert(arg_min, + polygons_of_drop_capitals[i_drop]) else: pass except: @@ -209,7 +222,8 @@ def adhere_drop_capital_region_into_corresponding_textline( try: # print(all_found_textline_polygons[j_cont][0]) - cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours( + all_found_textline_polygons[int(region_final)]) # print(all_box_coord[j_cont]) # print(cx_t) # print(cy_t) @@ -224,12 +238,15 @@ def adhere_drop_capital_region_into_corresponding_textline( # print(arg_min) cnt_nearest = np.copy(all_found_textline_polygons[int(region_final)][arg_min]) - cnt_nearest[:, 0, 0] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, 0] # +all_box_coord[int(region_final)][2] - cnt_nearest[:, 0, 1] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, 1] # +all_box_coord[int(region_final)][0] + cnt_nearest[:, 0, 0] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, + 0] # +all_box_coord[int(region_final)][2] + cnt_nearest[:, 0, 1] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, + 1] # +all_box_coord[int(region_final)][0] img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3)) img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255)) - img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255)) + img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], + color=(255, 255, 255)) img_textlines = img_textlines.astype(np.uint8) contours_combined = return_contours_of_interested_region(img_textlines, 255, 0) @@ -239,7 +256,8 @@ def adhere_drop_capital_region_into_corresponding_textline( #contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # print(len(contours_combined),'len textlines mixed') - areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) + areas_cnt_text = np.array( + [cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) contours_biggest = contours_combined[np.argmax(areas_cnt_text)] @@ -249,10 +267,11 @@ def adhere_drop_capital_region_into_corresponding_textline( contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] # -all_box_coord[int(region_final)][0] ##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2]) - if len(contours_combined)==1: + if len(contours_combined) == 1: all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest - elif len(contours_combined)==2: - all_found_textline_polygons[int(region_final)].insert(arg_min, polygons_of_drop_capitals[i_drop] ) + elif len(contours_combined) == 2: + all_found_textline_polygons[int(region_final)].insert(arg_min, + polygons_of_drop_capitals[i_drop]) else: pass # all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest @@ -311,14 +330,16 @@ def adhere_drop_capital_region_into_corresponding_textline( sum_pixels_of_intersection = [] for i in range(len(region_with_intersected_drop)): # print((region_with_intersected_drop[i]*3+1)) - sum_pixels_of_intersection.append(((img_con_all_copy[:, :, 0] == (region_with_intersected_drop[i] * 3 + 1)) * 1).sum()) + sum_pixels_of_intersection.append( + ((img_con_all_copy[:, :, 0] == (region_with_intersected_drop[i] * 3 + 1)) * 1).sum()) # print(sum_pixels_of_intersection) region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1 # print(region_final,'region_final') # cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) try: - cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours( + all_found_textline_polygons[int(region_final)]) # print(all_box_coord[j_cont]) # print(cx_t) # print(cy_t) @@ -335,23 +356,27 @@ def adhere_drop_capital_region_into_corresponding_textline( # print(arg_min) cnt_nearest = np.copy(all_found_textline_polygons[int(region_final)][arg_min]) - cnt_nearest[:, 0] = all_found_textline_polygons[int(region_final)][arg_min][:, 0] + all_box_coord[int(region_final)][2] - cnt_nearest[:, 1] = all_found_textline_polygons[int(region_final)][arg_min][:, 1] + all_box_coord[int(region_final)][0] + cnt_nearest[:, 0] = all_found_textline_polygons[int(region_final)][arg_min][:, 0] + \ + all_box_coord[int(region_final)][2] + cnt_nearest[:, 1] = all_found_textline_polygons[int(region_final)][arg_min][:, 1] + \ + all_box_coord[int(region_final)][0] img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3)) img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255)) - img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255)) + img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], + color=(255, 255, 255)) img_textlines = img_textlines.astype(np.uint8) contours_combined = return_contours_of_interested_region(img_textlines, 255, 0) - + #imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) #ret, thresh = cv2.threshold(imgray, 0, 255, 0) #contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # print(len(contours_combined),'len textlines mixed') - areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) + areas_cnt_text = np.array( + [cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) contours_biggest = contours_combined[np.argmax(areas_cnt_text)] @@ -360,11 +385,13 @@ def adhere_drop_capital_region_into_corresponding_textline( contours_biggest[:, 0, 0] = contours_biggest[:, 0, 0] - all_box_coord[int(region_final)][2] contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] - all_box_coord[int(region_final)][0] - contours_biggest = contours_biggest.reshape(np.shape(contours_biggest)[0], np.shape(contours_biggest)[2]) - if len(contours_combined)==1: + contours_biggest = contours_biggest.reshape(np.shape(contours_biggest)[0], + np.shape(contours_biggest)[2]) + if len(contours_combined) == 1: all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest - elif len(contours_combined)==2: - all_found_textline_polygons[int(region_final)].insert(arg_min, polygons_of_drop_capitals[i_drop] ) + elif len(contours_combined) == 2: + all_found_textline_polygons[int(region_final)].insert(arg_min, + polygons_of_drop_capitals[i_drop]) else: pass @@ -381,7 +408,8 @@ def adhere_drop_capital_region_into_corresponding_textline( # print(cx_t,'print') try: # print(all_found_textline_polygons[j_cont][0]) - cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours( + all_found_textline_polygons[int(region_final)]) # print(all_box_coord[j_cont]) # print(cx_t) # print(cy_t) @@ -396,23 +424,27 @@ def adhere_drop_capital_region_into_corresponding_textline( # print(arg_min) cnt_nearest = np.copy(all_found_textline_polygons[int(region_final)][arg_min]) - cnt_nearest[:, 0] = all_found_textline_polygons[int(region_final)][arg_min][:, 0] + all_box_coord[int(region_final)][2] - cnt_nearest[:, 1] = all_found_textline_polygons[int(region_final)][arg_min][:, 1] + all_box_coord[int(region_final)][0] + cnt_nearest[:, 0] = all_found_textline_polygons[int(region_final)][arg_min][:, 0] + \ + all_box_coord[int(region_final)][2] + cnt_nearest[:, 1] = all_found_textline_polygons[int(region_final)][arg_min][:, 1] + \ + all_box_coord[int(region_final)][0] img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3)) img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255)) - img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255)) + img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], + color=(255, 255, 255)) img_textlines = img_textlines.astype(np.uint8) contours_combined = return_contours_of_interested_region(img_textlines, 255, 0) - + #imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) #ret, thresh = cv2.threshold(imgray, 0, 255, 0) #contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # print(len(contours_combined),'len textlines mixed') - areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) + areas_cnt_text = np.array( + [cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) contours_biggest = contours_combined[np.argmax(areas_cnt_text)] @@ -421,11 +453,13 @@ def adhere_drop_capital_region_into_corresponding_textline( contours_biggest[:, 0, 0] = contours_biggest[:, 0, 0] - all_box_coord[int(region_final)][2] contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] - all_box_coord[int(region_final)][0] - contours_biggest = contours_biggest.reshape(np.shape(contours_biggest)[0], np.shape(contours_biggest)[2]) - if len(contours_combined)==1: + contours_biggest = contours_biggest.reshape(np.shape(contours_biggest)[0], + np.shape(contours_biggest)[2]) + if len(contours_combined) == 1: all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest - elif len(contours_combined)==2: - all_found_textline_polygons[int(region_final)].insert(arg_min, polygons_of_drop_capitals[i_drop] ) + elif len(contours_combined) == 2: + all_found_textline_polygons[int(region_final)].insert(arg_min, + polygons_of_drop_capitals[i_drop]) else: pass # all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest @@ -502,7 +536,6 @@ def adhere_drop_capital_region_into_corresponding_textline( def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1): - drop_only = (layout_no_patch[:, :, 0] == 4) * 1 contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) @@ -510,7 +543,8 @@ def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1): areas_cnt_text = np.array([cv2.contourArea(contours_drop_parent[j]) for j in range(len(contours_drop_parent))]) areas_cnt_text = areas_cnt_text / float(drop_only.shape[0] * drop_only.shape[1]) - contours_drop_parent = [contours_drop_parent[jz] for jz in range(len(contours_drop_parent)) if areas_cnt_text[jz] > 0.001] + contours_drop_parent = [contours_drop_parent[jz] for jz in range(len(contours_drop_parent)) if + areas_cnt_text[jz] > 0.001] areas_cnt_text = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > 0.001] @@ -520,15 +554,17 @@ def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) # boxes.append([int(x), int(y), int(w), int(h)]) - iou_of_box_and_contoure = float(drop_only.shape[0] * drop_only.shape[1]) * areas_cnt_text[jj] / float(w * h) * 100 + iou_of_box_and_contoure = float(drop_only.shape[0] * drop_only.shape[1]) * areas_cnt_text[jj] / float( + w * h) * 100 height_to_weight_ratio = h / float(w) weigh_to_height_ratio = w / float(h) if iou_of_box_and_contoure > 60 and weigh_to_height_ratio < 1.2 and height_to_weight_ratio < 2: map_of_drop_contour_bb = np.zeros((layout1.shape[0], layout1.shape[1])) - map_of_drop_contour_bb[y : y + h, x : x + w] = layout1[y : y + h, x : x + w] + map_of_drop_contour_bb[y: y + h, x: x + w] = layout1[y: y + h, x: x + w] - if (((map_of_drop_contour_bb == 1) * 1).sum() / float(((map_of_drop_contour_bb == 5) * 1).sum()) * 100) >= 15: + if (((map_of_drop_contour_bb == 1) * 1).sum() / float( + ((map_of_drop_contour_bb == 5) * 1).sum()) * 100) >= 15: contours_drop_parent_final.append(contours_drop_parent[jj]) layout_no_patch[:, :, 0][layout_no_patch[:, :, 0] == 4] = 0 @@ -536,4 +572,3 @@ def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1): layout_no_patch = cv2.fillPoly(layout_no_patch, pts=contours_drop_parent_final, color=(4, 4, 4)) return layout_no_patch - diff --git a/src/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py index 96c512c..c858fc0 100644 --- a/src/eynollah/utils/marginals.py +++ b/src/eynollah/utils/marginals.py @@ -8,206 +8,196 @@ from .rotate import rotate_image def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_version=False, kernel=None): - mask_marginals=np.zeros((text_with_lines.shape[0],text_with_lines.shape[1])) - mask_marginals=mask_marginals.astype(np.uint8) + mask_marginals = np.zeros((text_with_lines.shape[0], text_with_lines.shape[1])) + mask_marginals = mask_marginals.astype(np.uint8) - - text_with_lines=text_with_lines.astype(np.uint8) + text_with_lines = text_with_lines.astype(np.uint8) ##text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3) - text_with_lines_eroded=cv2.erode(text_with_lines,kernel,iterations=5) + text_with_lines_eroded = cv2.erode(text_with_lines, kernel, iterations=5) - if text_with_lines.shape[0]<=1500: + if text_with_lines.shape[0] <= 1500: pass elif 1500 < text_with_lines.shape[0] <= 1800: - text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.5),text_with_lines.shape[1]) - text_with_lines=cv2.erode(text_with_lines,kernel,iterations=5) - text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1]) + text_with_lines = resize_image(text_with_lines, int(text_with_lines.shape[0] * 1.5), text_with_lines.shape[1]) + text_with_lines = cv2.erode(text_with_lines, kernel, iterations=5) + text_with_lines = resize_image(text_with_lines, text_with_lines_eroded.shape[0], + text_with_lines_eroded.shape[1]) else: - text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.8),text_with_lines.shape[1]) - text_with_lines=cv2.erode(text_with_lines,kernel,iterations=7) - text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1]) - + text_with_lines = resize_image(text_with_lines, int(text_with_lines.shape[0] * 1.8), text_with_lines.shape[1]) + text_with_lines = cv2.erode(text_with_lines, kernel, iterations=7) + text_with_lines = resize_image(text_with_lines, text_with_lines_eroded.shape[0], + text_with_lines_eroded.shape[1]) - text_with_lines_y=text_with_lines.sum(axis=0) - text_with_lines_y_eroded=text_with_lines_eroded.sum(axis=0) + text_with_lines_y = text_with_lines.sum(axis=0) + text_with_lines_y_eroded = text_with_lines_eroded.sum(axis=0) - thickness_along_y_percent=text_with_lines_y_eroded.max()/(float(text_with_lines.shape[0]))*100 + thickness_along_y_percent = text_with_lines_y_eroded.max() / (float(text_with_lines.shape[0])) * 100 #print(thickness_along_y_percent,'thickness_along_y_percent') - if thickness_along_y_percent<30: - min_textline_thickness=8 + if thickness_along_y_percent < 30: + min_textline_thickness = 8 elif 30 <= thickness_along_y_percent < 50: - min_textline_thickness=20 + min_textline_thickness = 20 else: - min_textline_thickness=40 - - - - if thickness_along_y_percent>=14: - - text_with_lines_y_rev=-1*text_with_lines_y[:] + min_textline_thickness = 40 - text_with_lines_y_rev=text_with_lines_y_rev-np.min(text_with_lines_y_rev) + if thickness_along_y_percent >= 14: - sigma_gaus=1 - region_sum_0= gaussian_filter1d(text_with_lines_y, sigma_gaus) + text_with_lines_y_rev = -1 * text_with_lines_y[:] - region_sum_0_rev=gaussian_filter1d(text_with_lines_y_rev, sigma_gaus) + text_with_lines_y_rev = text_with_lines_y_rev - np.min(text_with_lines_y_rev) - region_sum_0_updown=region_sum_0[len(region_sum_0)::-1] + sigma_gaus = 1 + region_sum_0 = gaussian_filter1d(text_with_lines_y, sigma_gaus) - first_nonzero=(next((i for i, x in enumerate(region_sum_0) if x), None)) - last_nonzero=(next((i for i, x in enumerate(region_sum_0_updown) if x), None)) + region_sum_0_rev = gaussian_filter1d(text_with_lines_y_rev, sigma_gaus) + region_sum_0_updown = region_sum_0[len(region_sum_0)::-1] - last_nonzero=len(region_sum_0)-last_nonzero + first_nonzero = (next((i for i, x in enumerate(region_sum_0) if x), None)) + last_nonzero = (next((i for i, x in enumerate(region_sum_0_updown) if x), None)) - mid_point=(last_nonzero+first_nonzero)/2. + last_nonzero = len(region_sum_0) - last_nonzero + mid_point = (last_nonzero + first_nonzero) / 2. - one_third_right=(last_nonzero-mid_point)/3.0 - one_third_left=(mid_point-first_nonzero)/3.0 + one_third_right = (last_nonzero - mid_point) / 3.0 + one_third_left = (mid_point - first_nonzero) / 3.0 peaks, _ = find_peaks(text_with_lines_y_rev, height=0) - peaks=np.array(peaks) - peaks=peaks[(peaks>first_nonzero) & (peaks < last_nonzero)] - peaks=peaks[region_sum_0[peaks]mid_point] - peaks_left=peaks[peaks(mid_point+one_third_right)] - peaks_left=peaks[peaks<(mid_point-one_third_left)] + peaks = np.array(peaks) + peaks = peaks[(peaks > first_nonzero) & (peaks < last_nonzero)] + peaks = peaks[region_sum_0[peaks] < min_textline_thickness] + if num_col == 1: + peaks_right = peaks[peaks > mid_point] + peaks_left = peaks[peaks < mid_point] + if num_col == 2: + peaks_right = peaks[peaks > (mid_point + one_third_right)] + peaks_left = peaks[peaks < (mid_point - one_third_left)] try: - point_right=np.min(peaks_right) + point_right = np.min(peaks_right) except: - point_right=last_nonzero - + point_right = last_nonzero try: - point_left=np.max(peaks_left) + point_left = np.max(peaks_left) except: - point_left=first_nonzero - - + point_left = first_nonzero - if point_right>=mask_marginals.shape[1]: - point_right=mask_marginals.shape[1]-1 + if point_right >= mask_marginals.shape[1]: + point_right = mask_marginals.shape[1] - 1 try: - mask_marginals[:,point_left:point_right]=1 + mask_marginals[:, point_left:point_right] = 1 except: - mask_marginals[:,:]=1 + mask_marginals[:, :] = 1 - mask_marginals_rotated=rotate_image(mask_marginals,-slope_deskew) + mask_marginals_rotated = rotate_image(mask_marginals, -slope_deskew) - mask_marginals_rotated_sum=mask_marginals_rotated.sum(axis=0) + mask_marginals_rotated_sum = mask_marginals_rotated.sum(axis=0) - mask_marginals_rotated_sum[mask_marginals_rotated_sum!=0]=1 - index_x=np.array(range(len(mask_marginals_rotated_sum)))+1 + mask_marginals_rotated_sum[mask_marginals_rotated_sum != 0] = 1 + index_x = np.array(range(len(mask_marginals_rotated_sum))) + 1 - index_x_interest=index_x[mask_marginals_rotated_sum==1] + index_x_interest = index_x[mask_marginals_rotated_sum == 1] - min_point_of_left_marginal=np.min(index_x_interest)-16 - max_point_of_right_marginal=np.max(index_x_interest)+16 + min_point_of_left_marginal = np.min(index_x_interest) - 16 + max_point_of_right_marginal = np.max(index_x_interest) + 16 - if min_point_of_left_marginal<0: - min_point_of_left_marginal=0 - if max_point_of_right_marginal>=text_regions.shape[1]: - max_point_of_right_marginal=text_regions.shape[1]-1 + if min_point_of_left_marginal < 0: + min_point_of_left_marginal = 0 + if max_point_of_right_marginal >= text_regions.shape[1]: + max_point_of_right_marginal = text_regions.shape[1] - 1 if light_version: text_regions_org = np.copy(text_regions) - text_regions[text_regions[:,:]==1]=4 - - pixel_img=4 - min_area_text=0.00001 - - polygon_mask_marginals_rotated = return_contours_of_interested_region(mask_marginals,1,min_area_text) - + text_regions[text_regions[:, :] == 1] = 4 + + pixel_img = 4 + min_area_text = 0.00001 + + polygon_mask_marginals_rotated = return_contours_of_interested_region(mask_marginals, 1, min_area_text) + polygon_mask_marginals_rotated = polygon_mask_marginals_rotated[0] - polygons_of_marginals=return_contours_of_interested_region(text_regions,pixel_img,min_area_text) + polygons_of_marginals = return_contours_of_interested_region(text_regions, pixel_img, min_area_text) - cx_text_only,cy_text_only ,x_min_text_only,x_max_text_only, y_min_text_only ,y_max_text_only,y_cor_x_min_main=find_new_features_of_contours(polygons_of_marginals) + cx_text_only, cy_text_only, x_min_text_only, x_max_text_only, y_min_text_only, y_max_text_only, y_cor_x_min_main = find_new_features_of_contours( + polygons_of_marginals) - text_regions[(text_regions[:,:]==4)]=1 + text_regions[(text_regions[:, :] == 4)] = 1 - marginlas_should_be_main_text=[] + marginlas_should_be_main_text = [] - x_min_marginals_left=[] - x_min_marginals_right=[] + x_min_marginals_left = [] + x_min_marginals_right = [] for i in range(len(cx_text_only)): - results = cv2.pointPolygonTest(polygon_mask_marginals_rotated, (cx_text_only[i], cy_text_only[i]), False) + results = cv2.pointPolygonTest(polygon_mask_marginals_rotated, (cx_text_only[i], cy_text_only[i]), + False) if results == -1: marginlas_should_be_main_text.append(polygons_of_marginals[i]) - - - text_regions_org=cv2.fillPoly(text_regions_org, pts =marginlas_should_be_main_text, color=(4,4)) + text_regions_org = cv2.fillPoly(text_regions_org, pts=marginlas_should_be_main_text, color=(4, 4)) text_regions = np.copy(text_regions_org) - else: - - text_regions[(mask_marginals_rotated[:,:]!=1) & (text_regions[:,:]==1)]=4 - pixel_img=4 - min_area_text=0.00001 + text_regions[(mask_marginals_rotated[:, :] != 1) & (text_regions[:, :] == 1)] = 4 - polygons_of_marginals=return_contours_of_interested_region(text_regions,pixel_img,min_area_text) + pixel_img = 4 + min_area_text = 0.00001 - cx_text_only,cy_text_only ,x_min_text_only,x_max_text_only, y_min_text_only ,y_max_text_only,y_cor_x_min_main=find_new_features_of_contours(polygons_of_marginals) + polygons_of_marginals = return_contours_of_interested_region(text_regions, pixel_img, min_area_text) - text_regions[(text_regions[:,:]==4)]=1 + cx_text_only, cy_text_only, x_min_text_only, x_max_text_only, y_min_text_only, y_max_text_only, y_cor_x_min_main = find_new_features_of_contours( + polygons_of_marginals) - marginlas_should_be_main_text=[] + text_regions[(text_regions[:, :] == 4)] = 1 - x_min_marginals_left=[] - x_min_marginals_right=[] + marginlas_should_be_main_text = [] + + x_min_marginals_left = [] + x_min_marginals_right = [] for i in range(len(cx_text_only)): - x_width_mar=abs(x_min_text_only[i]-x_max_text_only[i]) - y_height_mar=abs(y_min_text_only[i]-y_max_text_only[i]) + x_width_mar = abs(x_min_text_only[i] - x_max_text_only[i]) + y_height_mar = abs(y_min_text_only[i] - y_max_text_only[i]) - if x_width_mar>16 and y_height_mar/x_width_mar<18: + if x_width_mar > 16 and y_height_mar / x_width_mar < 18: marginlas_should_be_main_text.append(polygons_of_marginals[i]) - if x_min_text_only[i]<(mid_point-one_third_left): - x_min_marginals_left_new=x_min_text_only[i] - if len(x_min_marginals_left)==0: + if x_min_text_only[i] < (mid_point - one_third_left): + x_min_marginals_left_new = x_min_text_only[i] + if len(x_min_marginals_left) == 0: x_min_marginals_left.append(x_min_marginals_left_new) else: - x_min_marginals_left[0]=min(x_min_marginals_left[0],x_min_marginals_left_new) + x_min_marginals_left[0] = min(x_min_marginals_left[0], x_min_marginals_left_new) else: - x_min_marginals_right_new=x_min_text_only[i] - if len(x_min_marginals_right)==0: + x_min_marginals_right_new = x_min_text_only[i] + if len(x_min_marginals_right) == 0: x_min_marginals_right.append(x_min_marginals_right_new) else: - x_min_marginals_right[0]=min(x_min_marginals_right[0],x_min_marginals_right_new) - - if len(x_min_marginals_left)==0: - x_min_marginals_left=[0] - if len(x_min_marginals_right)==0: - x_min_marginals_right=[text_regions.shape[1]-1] - + x_min_marginals_right[0] = min(x_min_marginals_right[0], x_min_marginals_right_new) - text_regions=cv2.fillPoly(text_regions, pts =marginlas_should_be_main_text, color=(4,4)) + if len(x_min_marginals_left) == 0: + x_min_marginals_left = [0] + if len(x_min_marginals_right) == 0: + x_min_marginals_right = [text_regions.shape[1] - 1] + text_regions = cv2.fillPoly(text_regions, pts=marginlas_should_be_main_text, color=(4, 4)) #text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0 #text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0 - - - text_regions[:,:int(min_point_of_left_marginal)][text_regions[:,:int(min_point_of_left_marginal)]==1]=0 - text_regions[:,int(max_point_of_right_marginal):][text_regions[:,int(max_point_of_right_marginal):]==1]=0 + + text_regions[:, :int(min_point_of_left_marginal)][ + text_regions[:, :int(min_point_of_left_marginal)] == 1] = 0 + text_regions[:, int(max_point_of_right_marginal):][ + text_regions[:, int(max_point_of_right_marginal):] == 1] = 0 ###text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4 @@ -216,7 +206,6 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve #plt.plot(peaks,region_sum_0[peaks],'*') #plt.show() - #plt.imshow(text_regions) #plt.show() diff --git a/src/eynollah/utils/pil_cv2.py b/src/eynollah/utils/pil_cv2.py index cc128e4..93d6de5 100644 --- a/src/eynollah/utils/pil_cv2.py +++ b/src/eynollah/utils/pil_cv2.py @@ -3,6 +3,7 @@ import numpy as np from ocrd_models import OcrdExif from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, COLOR_BGR2RGB, cvtColor, imread + # from sbb_binarization @@ -12,7 +13,7 @@ def cv2pil(img): def pil2cv(img): # from ocrd/workspace.py - color_conversion = COLOR_GRAY2BGR if img.mode in ('1', 'L') else COLOR_RGB2BGR + color_conversion = COLOR_GRAY2BGR if img.mode in ('1', 'L') else COLOR_RGB2BGR pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img) return cvtColor(pil_as_np_array, color_conversion) diff --git a/src/eynollah/utils/rotate.py b/src/eynollah/utils/rotate.py index 731814f..81df6c9 100644 --- a/src/eynollah/utils/rotate.py +++ b/src/eynollah/utils/rotate.py @@ -49,7 +49,7 @@ def rotate_image(img_patch, slope): return cv2.warpAffine(img_patch, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) -def rotate_image_different( img, slope): +def rotate_image_different(img, slope): # img = cv2.imread('images/input.jpg') num_rows, num_cols = img.shape[:2] @@ -65,7 +65,8 @@ def rotate_max_area(image, rotated, rotated_textline, rotated_layout, rotated_ta y2 = y1 + int(hr) x1 = w // 2 - int(wr / 2) x2 = x1 + int(wr) - return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2], rotated_table_prediction[y1:y2, x1:x2] + return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, + x1:x2], rotated_table_prediction[y1:y2, x1:x2] def rotation_not_90_func(img, textline, text_regions_p_1, table_prediction, thetha): @@ -91,5 +92,5 @@ def rotate_max_area_full_layout(image, rotated, rotated_textline, rotated_layout y2 = y1 + int(hr) x1 = w // 2 - int(wr / 2) x2 = x1 + int(wr) - return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2], rotated_layout_full[y1:y2, x1:x2] - + return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2], rotated_layout_full[ + y1:y2, x1:x2] diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 5057c34..3694ccf 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -47,7 +47,7 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): y = textline_patch_sum_along_width[:] # [first_nonzero:last_nonzero] y_padded = np.zeros(len(y) + 40) - y_padded[20 : len(y) + 20] = y + y_padded[20: len(y) + 20] = y x = np.array(range(len(y))) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) @@ -56,14 +56,15 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): y_padded_smoothed_e = gaussian_filter1d(y_padded, 2) y_padded_up_to_down_e = -y_padded + np.max(y_padded) y_padded_up_to_down_padded_e = np.zeros(len(y_padded_up_to_down_e) + 40) - y_padded_up_to_down_padded_e[20 : len(y_padded_up_to_down_e) + 20] = y_padded_up_to_down_e + y_padded_up_to_down_padded_e[20: len(y_padded_up_to_down_e) + 20] = y_padded_up_to_down_e y_padded_up_to_down_padded_e = gaussian_filter1d(y_padded_up_to_down_padded_e, 2) peaks_e, _ = find_peaks(y_padded_smoothed_e, height=0) peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] + arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[ + y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -74,11 +75,11 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): clusters_to_be_deleted = [] if len(arg_diff_cluster) > 0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[0: arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1: arg_diff_cluster[i + 1] + 1]) - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1:]) if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): @@ -115,7 +116,7 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): y_padded_smoothed = gaussian_filter1d(y_padded, sigma_gaus) y_padded_up_to_down = -y_padded + np.max(y_padded) y_padded_up_to_down_padded = np.zeros(len(y_padded_up_to_down) + 40) - y_padded_up_to_down_padded[20 : len(y_padded_up_to_down) + 20] = y_padded_up_to_down + y_padded_up_to_down_padded[20: len(y_padded_up_to_down) + 20] = y_padded_up_to_down y_padded_up_to_down_padded = gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus) peaks, _ = find_peaks(y_padded_smoothed, height=0) @@ -139,7 +140,7 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): M = cv2.getRotationMatrix2D(center, -thetha, 1.0) x_d = M[0, 2] y_d = M[1, 2] - + thetha = thetha / 180. * np.pi rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]]) contour_text_interest_copy = contour_text_interest.copy() @@ -164,147 +165,148 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): x = np.array(range(len(y))) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) - if 1>0: + if 1 > 0: try: - y_padded_smoothed_e= gaussian_filter1d(y_padded, 2) - y_padded_up_to_down_e=-y_padded+np.max(y_padded) - y_padded_up_to_down_padded_e=np.zeros(len(y_padded_up_to_down_e)+40) - y_padded_up_to_down_padded_e[20:len(y_padded_up_to_down_e)+20]=y_padded_up_to_down_e - y_padded_up_to_down_padded_e= gaussian_filter1d(y_padded_up_to_down_padded_e, 2) - + y_padded_smoothed_e = gaussian_filter1d(y_padded, 2) + y_padded_up_to_down_e = -y_padded + np.max(y_padded) + y_padded_up_to_down_padded_e = np.zeros(len(y_padded_up_to_down_e) + 40) + y_padded_up_to_down_padded_e[20:len(y_padded_up_to_down_e) + 20] = y_padded_up_to_down_e + y_padded_up_to_down_padded_e = gaussian_filter1d(y_padded_up_to_down_padded_e, 2) peaks_e, _ = find_peaks(y_padded_smoothed_e, height=0) peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) - neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - - arg_neg_must_be_deleted= np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3] - diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted) - - arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) - arg_diff_cluster=arg_diff[diff_arg_neg_must_be_deleted>1] - - peaks_new=peaks_e[:] - peaks_neg_new=peaks_neg_e[:] - - clusters_to_be_deleted=[] - if len(arg_diff_cluster)>0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0]+1]) - for i in range(len(arg_diff_cluster)-1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1: - arg_diff_cluster[i+1]+1]) - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster)-1]+1:]) - if len(clusters_to_be_deleted)>0: - peaks_new_extra=[] + neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) + + arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[ + y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] + diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) + + arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) + arg_diff_cluster = arg_diff[diff_arg_neg_must_be_deleted > 1] + + peaks_new = peaks_e[:] + peaks_neg_new = peaks_neg_e[:] + + clusters_to_be_deleted = [] + if len(arg_diff_cluster) > 0: + clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0] + 1]) + for i in range(len(arg_diff_cluster) - 1): + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1: + arg_diff_cluster[i + 1] + 1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1:]) + if len(clusters_to_be_deleted) > 0: + peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): - min_cluster=np.min(peaks_e[clusters_to_be_deleted[m]]) - max_cluster=np.max(peaks_e[clusters_to_be_deleted[m]]) - peaks_new_extra.append( int( (min_cluster+max_cluster)/2.0) ) + min_cluster = np.min(peaks_e[clusters_to_be_deleted[m]]) + max_cluster = np.max(peaks_e[clusters_to_be_deleted[m]]) + peaks_new_extra.append(int((min_cluster + max_cluster) / 2.0)) for m1 in range(len(clusters_to_be_deleted[m])): - peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]-1]] - peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]]] - peaks_neg_new=peaks_neg_new[peaks_neg_new!=peaks_neg_e[clusters_to_be_deleted[m][m1]]] - peaks_new_tot=[] + peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1] - 1]] + peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1]]] + peaks_neg_new = peaks_neg_new[peaks_neg_new != peaks_neg_e[clusters_to_be_deleted[m][m1]]] + peaks_new_tot = [] for i1 in peaks_new: peaks_new_tot.append(i1) for i1 in peaks_new_extra: peaks_new_tot.append(i1) - peaks_new_tot=np.sort(peaks_new_tot) + peaks_new_tot = np.sort(peaks_new_tot) else: - peaks_new_tot=peaks_e[:] - - textline_con,hierarchy=return_contours_of_image(img_patch) - textline_con_fil=filter_contours_area_of_image(img_patch, - textline_con, hierarchy, - max_area=1, min_area=0.0008) - y_diff_mean=np.mean(np.diff(peaks_new_tot))#self.find_contours_mean_y_diff(textline_con_fil) - sigma_gaus=int( y_diff_mean * (7./40.0) ) + peaks_new_tot = peaks_e[:] + + textline_con, hierarchy = return_contours_of_image(img_patch) + textline_con_fil = filter_contours_area_of_image(img_patch, + textline_con, hierarchy, + max_area=1, min_area=0.0008) + y_diff_mean = np.mean(np.diff(peaks_new_tot)) #self.find_contours_mean_y_diff(textline_con_fil) + sigma_gaus = int(y_diff_mean * (7. / 40.0)) #print(sigma_gaus,'sigma_gaus') except: - sigma_gaus=12 - if sigma_gaus<3: - sigma_gaus=3 + sigma_gaus = 12 + if sigma_gaus < 3: + sigma_gaus = 3 #print(sigma_gaus,'sigma') - y_padded_smoothed= gaussian_filter1d(y_padded, sigma_gaus) - y_padded_up_to_down=-y_padded+np.max(y_padded) - y_padded_up_to_down_padded=np.zeros(len(y_padded_up_to_down)+40) - y_padded_up_to_down_padded[20:len(y_padded_up_to_down)+20]=y_padded_up_to_down - y_padded_up_to_down_padded= gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus) - + y_padded_smoothed = gaussian_filter1d(y_padded, sigma_gaus) + y_padded_up_to_down = -y_padded + np.max(y_padded) + y_padded_up_to_down_padded = np.zeros(len(y_padded_up_to_down) + 40) + y_padded_up_to_down_padded[20:len(y_padded_up_to_down) + 20] = y_padded_up_to_down + y_padded_up_to_down_padded = gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus) + peaks, _ = find_peaks(y_padded_smoothed, height=0) peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0) - + try: - neg_peaks_max=np.max(y_padded_smoothed[peaks]) - arg_neg_must_be_deleted= np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42] - diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted) - - arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) - arg_diff_cluster=arg_diff[diff_arg_neg_must_be_deleted>1] + neg_peaks_max = np.max(y_padded_smoothed[peaks]) + arg_neg_must_be_deleted = np.arange(len(peaks_neg))[ + y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42] + diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) + + arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) + arg_diff_cluster = arg_diff[diff_arg_neg_must_be_deleted > 1] except: - arg_neg_must_be_deleted=[] - arg_diff_cluster=[] + arg_neg_must_be_deleted = [] + arg_diff_cluster = [] try: - peaks_new=peaks[:] - peaks_neg_new=peaks_neg[:] - clusters_to_be_deleted=[] - - if len(arg_diff_cluster)>=2 and len(arg_diff_cluster)>0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0]+1]) - for i in range(len(arg_diff_cluster)-1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1: - arg_diff_cluster[i+1]+1]) - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster)-1]+1:]) - elif len(arg_neg_must_be_deleted)>=2 and len(arg_diff_cluster)==0: + peaks_new = peaks[:] + peaks_neg_new = peaks_neg[:] + clusters_to_be_deleted = [] + + if len(arg_diff_cluster) >= 2 and len(arg_diff_cluster) > 0: + clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0] + 1]) + for i in range(len(arg_diff_cluster) - 1): + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1: + arg_diff_cluster[i + 1] + 1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1:]) + elif len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) == 0: clusters_to_be_deleted.append(arg_neg_must_be_deleted[:]) - - if len(arg_neg_must_be_deleted)==1: + + if len(arg_neg_must_be_deleted) == 1: clusters_to_be_deleted.append(arg_neg_must_be_deleted) - if len(clusters_to_be_deleted)>0: - peaks_new_extra=[] + if len(clusters_to_be_deleted) > 0: + peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): - min_cluster=np.min(peaks[clusters_to_be_deleted[m]]) - max_cluster=np.max(peaks[clusters_to_be_deleted[m]]) - peaks_new_extra.append( int( (min_cluster+max_cluster)/2.0) ) + min_cluster = np.min(peaks[clusters_to_be_deleted[m]]) + max_cluster = np.max(peaks[clusters_to_be_deleted[m]]) + peaks_new_extra.append(int((min_cluster + max_cluster) / 2.0)) for m1 in range(len(clusters_to_be_deleted[m])): - peaks_new=peaks_new[peaks_new!=peaks[clusters_to_be_deleted[m][m1]-1]] - peaks_new=peaks_new[peaks_new!=peaks[clusters_to_be_deleted[m][m1]]] - peaks_neg_new=peaks_neg_new[peaks_neg_new!=peaks_neg[clusters_to_be_deleted[m][m1]]] - peaks_new_tot=[] + peaks_new = peaks_new[peaks_new != peaks[clusters_to_be_deleted[m][m1] - 1]] + peaks_new = peaks_new[peaks_new != peaks[clusters_to_be_deleted[m][m1]]] + peaks_neg_new = peaks_neg_new[peaks_neg_new != peaks_neg[clusters_to_be_deleted[m][m1]]] + peaks_new_tot = [] for i1 in peaks_new: peaks_new_tot.append(i1) for i1 in peaks_new_extra: peaks_new_tot.append(i1) - peaks_new_tot=np.sort(peaks_new_tot) - + peaks_new_tot = np.sort(peaks_new_tot) + ##plt.plot(y_padded_up_to_down_padded) ##plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*') ##plt.show() - + ##plt.plot(y_padded_up_to_down_padded) ##plt.plot(peaks_neg_new,y_padded_up_to_down_padded[peaks_neg_new],'*') ##plt.show() - + ##plt.plot(y_padded_smoothed) ##plt.plot(peaks,y_padded_smoothed[peaks],'*') ##plt.show() - + ##plt.plot(y_padded_smoothed) ##plt.plot(peaks_new_tot,y_padded_smoothed[peaks_new_tot],'*') ##plt.show() - peaks=peaks_new_tot[:] - peaks_neg=peaks_neg_new[:] + peaks = peaks_new_tot[:] + peaks_neg = peaks_neg_new[:] else: - peaks_new_tot=peaks[:] - peaks=peaks_new_tot[:] - peaks_neg=peaks_neg_new[:] + peaks_new_tot = peaks[:] + peaks = peaks_new_tot[:] + peaks_neg = peaks_neg_new[:] except: pass - - mean_value_of_peaks=np.mean(y_padded_smoothed[peaks]) - std_value_of_peaks=np.std(y_padded_smoothed[peaks]) - peaks_values=y_padded_smoothed[peaks] - + + mean_value_of_peaks = np.mean(y_padded_smoothed[peaks]) + std_value_of_peaks = np.std(y_padded_smoothed[peaks]) + peaks_values = y_padded_smoothed[peaks] + peaks_neg = peaks_neg - 20 - 20 peaks = peaks - 20 for jj in range(len(peaks_neg)): @@ -316,44 +318,46 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): textline_boxes = [] textline_boxes_rot = [] - + if len(peaks_neg) == len(peaks) + 1 and len(peaks) >= 3: for jj in range(len(peaks)): - - if jj==(len(peaks)-1): + + if jj == (len(peaks) - 1): dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) - - if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.: + + if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.: point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_down = y_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) else: point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_down = y_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) point_down_narrow = peaks[jj] + first_nonzero + int( 1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2) else: dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) - - if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.: + + if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.: point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_down = peaks[jj] + first_nonzero + int( + 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) else: point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_down = peaks[jj] + first_nonzero + int( + 1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) point_down_narrow = peaks[jj] + first_nonzero + int( 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) - if point_down_narrow >= img_patch.shape[0]: point_down_narrow = img_patch.shape[0] - 2 - - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) - for mj in range(len(xv))] + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) + for mj in range(len(xv))] distances = np.array(distances) xvinside = xv[distances >= 0] @@ -374,25 +378,25 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d - - if x_min_rot1<0: - x_min_rot1=0 - if x_min_rot4<0: - x_min_rot4=0 - if point_up_rot1<0: - point_up_rot1=0 - if point_up_rot2<0: - point_up_rot2=0 - - x_min_rot1=x_min_rot1-x_help - x_max_rot2=x_max_rot2-x_help - x_max_rot3=x_max_rot3-x_help - x_min_rot4=x_min_rot4-x_help - - point_up_rot1=point_up_rot1-y_help - point_up_rot2=point_up_rot2-y_help - point_down_rot3=point_down_rot3-y_help - point_down_rot4=point_down_rot4-y_help + + if x_min_rot1 < 0: + x_min_rot1 = 0 + if x_min_rot4 < 0: + x_min_rot4 = 0 + if point_up_rot1 < 0: + point_up_rot1 = 0 + if point_up_rot2 < 0: + point_up_rot2 = 0 + + x_min_rot1 = x_min_rot1 - x_help + x_max_rot2 = x_max_rot2 - x_help + x_max_rot3 = x_max_rot3 - x_help + x_min_rot4 = x_min_rot4 - x_help + + point_up_rot1 = point_up_rot1 - y_help + point_up_rot2 = point_up_rot2 - y_help + point_down_rot3 = point_down_rot3 - y_help + point_down_rot4 = point_down_rot4 - y_help textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], @@ -433,25 +437,25 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d - - if x_min_rot1<0: - x_min_rot1=0 - if x_min_rot4<0: - x_min_rot4=0 - if point_up_rot1<0: - point_up_rot1=0 - if point_up_rot2<0: - point_up_rot2=0 - - x_min_rot1=x_min_rot1-x_help - x_max_rot2=x_max_rot2-x_help - x_max_rot3=x_max_rot3-x_help - x_min_rot4=x_min_rot4-x_help - - point_up_rot1=point_up_rot1-y_help - point_up_rot2=point_up_rot2-y_help - point_down_rot3=point_down_rot3-y_help - point_down_rot4=point_down_rot4-y_help + + if x_min_rot1 < 0: + x_min_rot1 = 0 + if x_min_rot4 < 0: + x_min_rot4 = 0 + if point_up_rot1 < 0: + point_up_rot1 = 0 + if point_up_rot2 < 0: + point_up_rot2 = 0 + + x_min_rot1 = x_min_rot1 - x_help + x_max_rot2 = x_max_rot2 - x_help + x_max_rot3 = x_max_rot3 - x_help + x_min_rot4 = x_min_rot4 - x_help + + point_up_rot1 = point_up_rot1 - y_help + point_up_rot2 = point_up_rot2 - y_help + point_down_rot3 = point_down_rot3 - y_help + point_down_rot4 = point_down_rot4 - y_help textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], @@ -465,21 +469,22 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): dis_to_next = np.abs(peaks[1] - peaks[0]) for jj in range(len(peaks)): if jj == 0: - point_up = 0#peaks[jj] + first_nonzero - int(1. / 1.7 * dis_to_next) + point_up = 0 #peaks[jj] + first_nonzero - int(1. / 1.7 * dis_to_next) if point_up < 0: point_up = 1 - point_down = peaks_neg[1] + first_nonzero# peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next) + point_down = peaks_neg[1] + first_nonzero # peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next) elif jj == 1: - point_down =peaks_neg[1] + first_nonzero# peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next) + point_down = peaks_neg[1] + first_nonzero # peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next) if point_down >= img_patch.shape[0]: point_down = img_patch.shape[0] - 2 try: - point_up = peaks_neg[2] + first_nonzero#peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next) + point_up = peaks_neg[2] + first_nonzero #peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next) except: - point_up =peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next) - + point_up = peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next) + distances = [cv2.pointPolygonTest(contour_text_interest_copy, - tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -500,25 +505,25 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d - - if x_min_rot1<0: - x_min_rot1=0 - if x_min_rot4<0: - x_min_rot4=0 - if point_up_rot1<0: - point_up_rot1=0 - if point_up_rot2<0: - point_up_rot2=0 - - x_min_rot1=x_min_rot1-x_help - x_max_rot2=x_max_rot2-x_help - x_max_rot3=x_max_rot3-x_help - x_min_rot4=x_min_rot4-x_help - - point_up_rot1=point_up_rot1-y_help - point_up_rot2=point_up_rot2-y_help - point_down_rot3=point_down_rot3-y_help - point_down_rot4=point_down_rot4-y_help + + if x_min_rot1 < 0: + x_min_rot1 = 0 + if x_min_rot4 < 0: + x_min_rot4 = 0 + if point_up_rot1 < 0: + point_up_rot1 = 0 + if point_up_rot2 < 0: + point_up_rot2 = 0 + + x_min_rot1 = x_min_rot1 - x_help + x_max_rot2 = x_max_rot2 - x_help + x_max_rot3 = x_max_rot3 - x_help + x_min_rot4 = x_min_rot4 - x_help + + point_up_rot1 = point_up_rot1 - y_help + point_up_rot2 = point_up_rot2 - y_help + point_down_rot3 = point_down_rot3 - y_help + point_down_rot4 = point_down_rot4 - y_help textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], @@ -552,9 +557,10 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next_up) point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next_down) - + distances = [cv2.pointPolygonTest(contour_text_interest_copy, - tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -575,25 +581,25 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d - - if x_min_rot1<0: - x_min_rot1=0 - if x_min_rot4<0: - x_min_rot4=0 - if point_up_rot1<0: - point_up_rot1=0 - if point_up_rot2<0: - point_up_rot2=0 - - x_min_rot1=x_min_rot1-x_help - x_max_rot2=x_max_rot2-x_help - x_max_rot3=x_max_rot3-x_help - x_min_rot4=x_min_rot4-x_help - - point_up_rot1=point_up_rot1-y_help - point_up_rot2=point_up_rot2-y_help - point_down_rot3=point_down_rot3-y_help - point_down_rot4=point_down_rot4-y_help + + if x_min_rot1 < 0: + x_min_rot1 = 0 + if x_min_rot4 < 0: + x_min_rot4 = 0 + if point_up_rot1 < 0: + point_up_rot1 = 0 + if point_up_rot2 < 0: + point_up_rot2 = 0 + + x_min_rot1 = x_min_rot1 - x_help + x_max_rot2 = x_max_rot2 - x_help + x_max_rot3 = x_max_rot3 - x_help + x_min_rot4 = x_min_rot4 - x_help + + point_up_rot1 = point_up_rot1 - y_help + point_up_rot2 = point_up_rot2 - y_help + point_down_rot3 = point_down_rot3 - y_help + point_down_rot4 = point_down_rot4 - y_help textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], @@ -626,7 +632,8 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): neg_peaks_max = np.max(y_padded_up_to_down_padded[peaks_neg]) - arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42] + arg_neg_must_be_deleted = np.arange(len(peaks_neg))[ + y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -637,11 +644,11 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): clusters_to_be_deleted = [] if len(arg_diff_cluster) >= 2 and len(arg_diff_cluster) > 0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[0: arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1: arg_diff_cluster[i + 1] + 1]) - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1:]) elif len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) == 0: clusters_to_be_deleted.append(arg_neg_must_be_deleted[:]) if len(arg_neg_must_be_deleted) == 1: @@ -704,24 +711,30 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) - point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + point_down_narrow = peaks[jj] + first_nonzero + int( + 1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2) else: dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0: point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_down = peaks[jj] + first_nonzero + int( + 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) else: point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_down = peaks[jj] + first_nonzero + int( + 1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) - point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + point_down_narrow = peaks[jj] + first_nonzero + int( + 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) if point_down_narrow >= img_patch.shape[0]: point_down_narrow = img_patch.shape[0] - 2 - - distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) for mj in range(len(xv))] + + distances = [cv2.pointPolygonTest(contour_text_interest_copy, + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) xvinside = xv[distances >= 0] @@ -808,9 +821,10 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): if point_down >= img_patch.shape[0]: point_down = img_patch.shape[0] - 2 point_up = peaks[jj] + first_nonzero - int(1.0 / 1.8 * dis_to_next) - + distances = [cv2.pointPolygonTest(contour_text_interest_copy, - tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -873,9 +887,10 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): point_up = peaks[jj] + first_nonzero - int(1.0 / 1.9 * dis_to_next_up) point_down = peaks[jj] + first_nonzero + int(1.0 / 1.9 * dis_to_next_down) - + distances = [cv2.pointPolygonTest(contour_text_interest_copy, - tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True) + tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), + True) for mj in range(len(xv))] distances = np.array(distances) @@ -944,7 +959,7 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): y = textline_patch_sum_along_width[:] # [first_nonzero:last_nonzero] y_padded = np.zeros(len(y) + 40) - y_padded[20 : len(y) + 20] = y + y_padded[20: len(y) + 20] = y x = np.array(range(len(y))) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) @@ -953,14 +968,15 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): y_padded_smoothed_e = gaussian_filter1d(y_padded, 2) y_padded_up_to_down_e = -y_padded + np.max(y_padded) y_padded_up_to_down_padded_e = np.zeros(len(y_padded_up_to_down_e) + 40) - y_padded_up_to_down_padded_e[20 : len(y_padded_up_to_down_e) + 20] = y_padded_up_to_down_e + y_padded_up_to_down_padded_e[20: len(y_padded_up_to_down_e) + 20] = y_padded_up_to_down_e y_padded_up_to_down_padded_e = gaussian_filter1d(y_padded_up_to_down_padded_e, 2) peaks_e, _ = find_peaks(y_padded_smoothed_e, height=0) peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] + arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[ + y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -971,10 +987,11 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): clusters_to_be_deleted = [] if len(arg_diff_cluster) > 0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[0: arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1]) - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) + clusters_to_be_deleted.append( + arg_neg_must_be_deleted[arg_diff_cluster[i] + 1: arg_diff_cluster[i + 1] + 1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1:]) if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] for m in range(len(clusters_to_be_deleted)): @@ -1011,7 +1028,7 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): y_padded_smoothed = gaussian_filter1d(y_padded, sigma_gaus) y_padded_up_to_down = -y_padded + np.max(y_padded) y_padded_up_to_down_padded = np.zeros(len(y_padded_up_to_down) + 40) - y_padded_up_to_down_padded[20 : len(y_padded_up_to_down) + 20] = y_padded_up_to_down + y_padded_up_to_down_padded[20: len(y_padded_up_to_down) + 20] = y_padded_up_to_down y_padded_up_to_down_padded = gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus) peaks, _ = find_peaks(y_padded_smoothed, height=0) @@ -1023,7 +1040,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): try: neg_peaks_max = np.max(y_padded_smoothed[peaks]) - arg_neg_must_be_deleted = np.arange(len(peaks_neg))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24] + arg_neg_must_be_deleted = np.arange(len(peaks_neg))[ + y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24] diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted) arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted))) @@ -1031,11 +1049,11 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): clusters_to_be_deleted = [] if len(arg_diff_cluster) >= 2 and len(arg_diff_cluster) > 0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[0: arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1: arg_diff_cluster[i + 1] + 1]) - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1:]) elif len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) == 0: clusters_to_be_deleted.append(arg_neg_must_be_deleted[:]) if len(arg_neg_must_be_deleted) == 1: @@ -1098,7 +1116,7 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): # print(peaks_neg_true) for i in range(len(peaks_neg_true)): - img_patch[peaks_neg_true[i] - 6 : peaks_neg_true[i] + 6, :] = 0 + img_patch[peaks_neg_true[i] - 6: peaks_neg_true[i] + 6, :] = 0 else: pass @@ -1108,7 +1126,7 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): for i in range(len(peaks_pos_true)): ##img_patch[peaks_pos_true[i]-8:peaks_pos_true[i]+8,:]=1 - img_patch[peaks_pos_true[i] - 6 : peaks_pos_true[i] + 6, :] = 1 + img_patch[peaks_pos_true[i] - 6: peaks_pos_true[i] + 6, :] = 1 else: pass kernel = np.ones((5, 5), np.uint8) @@ -1118,6 +1136,7 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): img_patch = cv2.erode(img_patch, kernel, iterations=1) return img_patch + def separate_lines_new_inside_tiles(img_path, thetha): (h, w) = img_path.shape[:2] center = (w // 2, h // 2) @@ -1144,7 +1163,7 @@ def separate_lines_new_inside_tiles(img_path, thetha): y = mada_n[:] # [first_nonzero:last_nonzero] y_help = np.zeros(len(y) + 40) - y_help[20 : len(y) + 20] = y + y_help[20: len(y) + 20] = y x = np.array(range(len(y))) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) @@ -1156,7 +1175,7 @@ def separate_lines_new_inside_tiles(img_path, thetha): z = gaussian_filter1d(y_help, sigma_gaus) zneg_rev = -y_help + np.max(y_help) zneg = np.zeros(len(zneg_rev) + 40) - zneg[20 : len(zneg_rev) + 20] = zneg_rev + zneg[20: len(zneg_rev) + 20] = zneg_rev zneg = gaussian_filter1d(zneg, sigma_gaus) peaks, _ = find_peaks(z, height=0) @@ -1240,7 +1259,7 @@ def separate_lines_new_inside_tiles(img_path, thetha): # print(peaks_neg_true) for i in range(len(peaks_neg_true)): - img_path[peaks_neg_true[i] - 6 : peaks_neg_true[i] + 6, :] = 0 + img_path[peaks_neg_true[i] - 6: peaks_neg_true[i] + 6, :] = 0 else: pass @@ -1250,7 +1269,7 @@ def separate_lines_new_inside_tiles(img_path, thetha): peaks_pos_true = peaks_pos_true - 20 for i in range(len(peaks_pos_true)): - img_path[peaks_pos_true[i] - 8 : peaks_pos_true[i] + 8, :] = 1 + img_path[peaks_pos_true[i] - 8: peaks_pos_true[i] + 8, :] = 1 else: pass kernel = np.ones((5, 5), np.uint8) @@ -1305,7 +1324,8 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i return None, cont_final -def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False): +def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, + add_boxes_coor_into_textlines=False): textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 textline_mask = textline_mask.astype(np.uint8) kernel = np.ones((5, 5), np.uint8) @@ -1332,8 +1352,8 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest textline_mask_help = np.zeros((textline_mask.shape[0] + int(2 * y_help), textline_mask.shape[1] + int(2 * x_help), 3)) - textline_mask_help[y_help : y_help + textline_mask.shape[0], - x_help : x_help + textline_mask.shape[1], :] = np.copy(textline_mask[:, :, :]) + textline_mask_help[y_help: y_help + textline_mask.shape[0], + x_help: x_help + textline_mask.shape[1], :] = np.copy(textline_mask[:, :, :]) dst = rotate_image(textline_mask_help, slope) dst = dst[:, :, 0] @@ -1356,8 +1376,8 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest img_contour_help = np.zeros((img_contour.shape[0] + int(2 * y_help), img_contour.shape[1] + int(2 * x_help), 3)) - img_contour_help[y_help : y_help + img_contour.shape[0], - x_help : x_help + img_contour.shape[1], :] = np.copy(img_contour[:, :, :]) + img_contour_help[y_help: y_help + img_contour.shape[0], + x_help: x_help + img_contour.shape[1], :] = np.copy(img_contour[:, :, :]) img_contour_rot = rotate_image(img_contour_help, slope) # plt.imshow(img_contour_rot_help) @@ -1497,11 +1517,13 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl img_patch_separated_returned[:, :][img_patch_separated_returned[:, :] != 0] = 1 img_patch_separated_returned_true_size = img_patch_separated_returned[ - int(img_int.shape[0] * 0.1): int(img_int.shape[0] * 0.1) + img_int.shape[0], - int(img_int.shape[1] * 1.0): int(img_int.shape[1] * 1.0) + img_int.shape[1]] + int(img_int.shape[0] * 0.1): int(img_int.shape[0] * 0.1) + + img_int.shape[0], + int(img_int.shape[1] * 1.0): int(img_int.shape[1] * 1.0) + + img_int.shape[1]] - img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin] - img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size + img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin: length_x - margin] + img_patch_ineterst_revised[:, index_x_d + margin: index_x_u - margin] = img_patch_separated_returned_true_size # plt.imshow(img_patch_ineterst_revised) # plt.show() @@ -1512,7 +1534,7 @@ def do_image_rotation(angle, img, sigma_des, logger=None): if logger is None: logger = getLogger(__package__) img_rot = rotate_image(img, angle) - img_rot[img_rot!=0] = 1 + img_rot[img_rot != 0] = 1 try: var = find_num_col_deskew(img_rot, sigma_des, 20.3) except: @@ -1521,23 +1543,23 @@ def do_image_rotation(angle, img, sigma_des, logger=None): return var -def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, +def return_deskew_slop(img_patch_org, sigma_des, n_tot_angles=100, main_page=False, logger=None, plotter=None, map=map): if main_page and plotter: plotter.save_plot_of_textline_density(img_patch_org) - img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1])) - img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0] + img_int = np.zeros((img_patch_org.shape[0], img_patch_org.shape[1])) + img_int[:, :] = img_patch_org[:, :] #img_patch_org[:,:,0] - max_shape=np.max(img_int.shape) - img_resized=np.zeros((int(max_shape * 1.1) , int(max_shape * 1.1))) + max_shape = np.max(img_int.shape) + img_resized = np.zeros((int(max_shape * 1.1), int(max_shape * 1.1))) - onset_x=int((img_resized.shape[1]-img_int.shape[1])/2.) - onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.) + onset_x = int((img_resized.shape[1] - img_int.shape[1]) / 2.) + onset_y = int((img_resized.shape[0] - img_int.shape[0]) / 2.) #img_resized=np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) )) #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:] - img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:] + img_resized[onset_y:onset_y + img_int.shape[0], onset_x:onset_x + img_int.shape[1]] = img_int[:, :] #print(img_resized.shape,'img_resizedshape') #plt.imshow(img_resized) @@ -1545,7 +1567,7 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]: #plt.imshow(img_resized) #plt.show() - angles = np.array([-45, 0, 45, 90,]) + angles = np.array([-45, 0, 45, 90, ]) angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles) @@ -1553,10 +1575,10 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, elif main_page: #plt.imshow(img_resized) #plt.show() - angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45]) + angles = np.linspace(-12, 12, n_tot_angles) #np.array([0 , 45 , 90 , -45]) angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) - early_slope_edge=11 + early_slope_edge = 11 if abs(angle) > early_slope_edge: if angle < 0: angles = np.linspace(-90, -12, n_tot_angles) @@ -1567,7 +1589,7 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10) angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) - early_slope_edge=22 + early_slope_edge = 22 if abs(angle) > early_slope_edge: if angle < 0: angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10) @@ -1608,13 +1630,13 @@ def do_work_of_slopes_new( x, y, w, h = box_text _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) mask_textline = np.zeros(textline_mask_tot_ea.shape) - mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) + mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1, 1, 1)) all_text_region_raw = textline_mask_tot_ea * mask_textline all_text_region_raw = all_text_region_raw[y: y + h, x: x + w].astype(np.uint8) - img_int_p = all_text_region_raw[:,:] + img_int_p = all_text_region_raw[:, :] img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2) - if img_int_p.shape[0] /img_int_p.shape[1] < 0.1: + if img_int_p.shape[0] / img_int_p.shape[1] < 0.1: slope = 0 slope_for_all = slope_deskew all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w] @@ -1715,11 +1737,11 @@ def do_work_of_slopes_new_curved( x, y, w, h = cv2.boundingRect(contour_par) mask_biggest = np.zeros(mask_texts_only.shape) mask_biggest = cv2.fillPoly(mask_biggest, pts=[contour_par], color=(1, 1, 1)) - mask_region_in_patch_region = mask_biggest[y : y + h, x : x + w] + mask_region_in_patch_region = mask_biggest[y: y + h, x: x + w] textline_biggest_region = mask_biggest * textline_mask_tot_ea # print(slope_for_all,'slope_for_all') - textline_rotated_separated = separate_lines_new2(textline_biggest_region[y: y+h, x: x+w], 0, + textline_rotated_separated = separate_lines_new2(textline_biggest_region[y: y + h, x: x + w], 0, num_col, slope_for_all, logger=logger, plotter=plotter) @@ -1728,7 +1750,7 @@ def do_work_of_slopes_new_curved( textline_rotated_separated[mask_region_in_patch_region[:, :] != 1] = 0 # till here - textline_region_in_image[y : y + h, x : x + w] = textline_rotated_separated + textline_region_in_image[y: y + h, x: x + w] = textline_rotated_separated # plt.imshow(textline_region_in_image) # plt.show() @@ -1746,14 +1768,16 @@ def do_work_of_slopes_new_curved( mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4) pixel_img = 1 - mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par)) + mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), + int(mask_biggest2.shape[1] * scale_par)) cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img) try: textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0]) except Exception as why: logger.error(why) else: - textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, True) + textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, + box_text, True) # print(np.shape(textlines_cnt_per_region),'textlines_cnt_per_region') return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope @@ -1771,7 +1795,7 @@ def do_work_of_slopes_new_light( x, y, w, h = box_text _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) mask_textline = np.zeros(textline_mask_tot_ea.shape) - mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) + mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1, 1, 1)) all_text_region_raw = textline_mask_tot_ea * mask_textline all_text_region_raw = all_text_region_raw[y: y + h, x: x + w].astype(np.uint8) diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index ae12725..369b6e8 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -8,23 +8,23 @@ from .utils.counter import EynollahIdCounter from ocrd_utils import getLogger from ocrd_models.ocrd_page import ( - BorderType, - CoordsType, - PcGtsType, - TextLineType, - TextEquivType, - TextRegionType, - ImageRegionType, - TableRegionType, - SeparatorRegionType, - to_xml - ) + BorderType, + CoordsType, + PcGtsType, + TextLineType, + TextEquivType, + TextRegionType, + ImageRegionType, + TableRegionType, + SeparatorRegionType, + to_xml +) import numpy as np class EynollahXmlWriter: - def __init__(self, *, dir_out, image_filename, curved_line,textline_light, pcgts=None): + def __init__(self, *, dir_out, image_filename, curved_line, textline_light, pcgts=None): self.logger = getLogger('eynollah.writer') self.counter = EynollahIdCounter() self.dir_out = dir_out @@ -33,10 +33,10 @@ class EynollahXmlWriter: self.curved_line = curved_line self.textline_light = textline_light self.pcgts = pcgts - self.scale_x = None # XXX set outside __init__ - self.scale_y = None # XXX set outside __init__ - self.height_org = None # XXX set outside __init__ - self.width_org = None # XXX set outside __init__ + self.scale_x = None # XXX set outside __init__ + self.scale_y = None # XXX set outside __init__ + self.height_org = None # XXX set outside __init__ + self.width_org = None # XXX set outside __init__ @property def image_filename_stem(self): @@ -53,11 +53,12 @@ class EynollahXmlWriter: else: points_page_print += str(int((contour[0][0]) / self.scale_x)) points_page_print += ',' - points_page_print += str(int((contour[0][1] ) / self.scale_y)) + points_page_print += str(int((contour[0][1]) / self.scale_y)) points_page_print = points_page_print + ' ' return points_page_print[:-1] - def serialize_lines_in_marginal(self, marginal_region, all_found_textline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, counter): + def serialize_lines_in_marginal(self, marginal_region, all_found_textline_polygons_marginals, marginal_idx, + page_coord, all_box_coord_marginals, slopes_marginals, counter): for j in range(len(all_found_textline_polygons_marginals[marginal_idx])): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords) @@ -67,43 +68,60 @@ class EynollahXmlWriter: for l in range(len(all_found_textline_polygons_marginals[marginal_idx][j])): if not (self.curved_line or self.textline_light): if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2: - textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) ) - textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) ) + textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + + all_box_coord_marginals[marginal_idx][2] + page_coord[ + 2]) / self.scale_x)) + textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + + all_box_coord_marginals[marginal_idx][0] + page_coord[ + 0]) / self.scale_y)) else: - textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) ) - textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) ) + textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + + all_box_coord_marginals[marginal_idx][2] + page_coord[ + 2]) / self.scale_x)) + textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + + all_box_coord_marginals[marginal_idx][0] + page_coord[ + 0]) / self.scale_y)) points_co += str(textline_x_coord) points_co += ',' points_co += str(textline_y_coord) if (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) <= 45: if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2: - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x)) + points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + page_coord[ + 2]) / self.scale_x)) points_co += ',' - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + page_coord[0]) / self.scale_y)) + points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + page_coord[ + 0]) / self.scale_y)) else: - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + page_coord[2]) / self.scale_x)) + points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + page_coord[0]) / self.scale_y)) + points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + + page_coord[0]) / self.scale_y)) elif (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) > 45: if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2: - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) + points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y)) + points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y)) else: - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) + points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y)) + points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y)) points_co += ' ' coords.set_points(points_co[:-1]) - def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): + def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, + slopes, counter, ocr_all_textlines_textregion): self.logger.debug('enter serialize_lines_in_region') for j in range(len(all_found_textline_polygons[region_idx])): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords) if ocr_all_textlines_textregion: - textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) + textline.set_TextEquiv([TextEquivType(Unicode=ocr_all_textlines_textregion[j])]) text_region.add_TextLine(textline) text_region.set_orientation(-slopes[region_idx]) region_bboxes = all_box_coord[region_idx] @@ -111,11 +129,15 @@ class EynollahXmlWriter: for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[region_idx][j]): if not (self.curved_line or self.textline_light): if len(contour_textline) == 2: - textline_x_coord = max(0, int((contour_textline[0] + region_bboxes[2] + page_coord[2]) / self.scale_x)) - textline_y_coord = max(0, int((contour_textline[1] + region_bboxes[0] + page_coord[0]) / self.scale_y)) + textline_x_coord = max(0, int((contour_textline[0] + region_bboxes[2] + page_coord[ + 2]) / self.scale_x)) + textline_y_coord = max(0, int((contour_textline[1] + region_bboxes[0] + page_coord[ + 0]) / self.scale_y)) else: - textline_x_coord = max(0, int((contour_textline[0][0] + region_bboxes[2] + page_coord[2]) / self.scale_x)) - textline_y_coord = max(0, int((contour_textline[0][1] + region_bboxes[0] + page_coord[0]) / self.scale_y)) + textline_x_coord = max(0, int((contour_textline[0][0] + region_bboxes[2] + page_coord[ + 2]) / self.scale_x)) + textline_y_coord = max(0, int((contour_textline[0][1] + region_bboxes[0] + page_coord[ + 0]) / self.scale_y)) points_co += str(textline_x_coord) points_co += ',' points_co += str(textline_y_coord) @@ -128,26 +150,29 @@ class EynollahXmlWriter: else: points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y)) + points_co += str(int((contour_textline[0][1] + page_coord[0]) / self.scale_y)) elif (self.curved_line or self.textline_light) and np.abs(slopes[region_idx]) > 45: - if len(contour_textline)==2: - points_co += str(int((contour_textline[0] + region_bboxes[2] + page_coord[2])/self.scale_x)) + if len(contour_textline) == 2: + points_co += str(int((contour_textline[0] + region_bboxes[2] + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((contour_textline[1] + region_bboxes[0] + page_coord[0])/self.scale_y)) + points_co += str(int((contour_textline[1] + region_bboxes[0] + page_coord[0]) / self.scale_y)) else: - points_co += str(int((contour_textline[0][0] + region_bboxes[2]+page_coord[2])/self.scale_x)) + points_co += str( + int((contour_textline[0][0] + region_bboxes[2] + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((contour_textline[0][1] + region_bboxes[0]+page_coord[0])/self.scale_y)) + points_co += str( + int((contour_textline[0][1] + region_bboxes[0] + page_coord[0]) / self.scale_y)) points_co += ' ' coords.set_points(points_co[:-1]) - - def serialize_lines_in_dropcapital(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): + + def serialize_lines_in_dropcapital(self, text_region, all_found_textline_polygons, region_idx, page_coord, + all_box_coord, slopes, counter, ocr_all_textlines_textregion): self.logger.debug('enter serialize_lines_in_region') for j in range(1): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords) if ocr_all_textlines_textregion: - textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) + textline.set_TextEquiv([TextEquivType(Unicode=ocr_all_textlines_textregion[j])]) text_region.add_TextLine(textline) #region_bboxes = all_box_coord[region_idx] points_co = '' @@ -159,7 +184,7 @@ class EynollahXmlWriter: else: points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y)) + points_co += str(int((contour_textline[0][1] + page_coord[0]) / self.scale_y)) points_co += ' ' coords.set_points(points_co[:-1]) @@ -169,7 +194,11 @@ class EynollahXmlWriter: with open(self.output_filename, 'w') as f: f.write(to_xml(pcgts)) - def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines): + def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, + all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, + found_polygons_marginals, all_found_textline_polygons_marginals, + all_box_coord_marginals, slopes, slopes_marginals, cont_page, + polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines): self.logger.debug('enter build_pagexml_no_full_layout') # create the file structure @@ -185,20 +214,26 @@ class EynollahXmlWriter: for mm in range(len(found_polygons_text_region)): textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord)), - ) + Coords=CoordsType( + points=self.calculate_polygon_coords(found_polygons_text_region[mm], + page_coord)), + ) page.add_TextRegion(textregion) if ocr_all_textlines: ocr_textlines = ocr_all_textlines[mm] else: ocr_textlines = None - self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines) + self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, + slopes, counter, ocr_textlines) for mm in range(len(found_polygons_marginals)): marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord))) + Coords=CoordsType( + points=self.calculate_polygon_coords(found_polygons_marginals[mm], + page_coord))) page.add_TextRegion(marginal) - self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter) + self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, + all_box_coord_marginals, slopes_marginals, counter) for mm in range(len(found_polygons_text_region_img)): img_region = ImageRegionType(id=counter.next_region_id, Coords=CoordsType()) @@ -206,27 +241,29 @@ class EynollahXmlWriter: points_co = '' for lmm in range(len(found_polygons_text_region_img[mm])): try: - points_co += str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x)) + points_co += str( + int((found_polygons_text_region_img[mm][lmm, 0, 0] + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y)) + points_co += str( + int((found_polygons_text_region_img[mm][lmm, 0, 1] + page_coord[0]) / self.scale_y)) points_co += ' ' except: - points_co += str(int((found_polygons_text_region_img[mm][lmm][0] + page_coord[2])/ self.scale_x )) + points_co += str(int((found_polygons_text_region_img[mm][lmm][0] + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((found_polygons_text_region_img[mm][lmm][1] + page_coord[0])/ self.scale_y )) + points_co += str(int((found_polygons_text_region_img[mm][lmm][1] + page_coord[0]) / self.scale_y)) points_co += ' ' - + img_region.get_Coords().set_points(points_co[:-1]) - + for mm in range(len(polygons_lines_to_be_written_in_xml)): sep_hor = SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType()) page.add_SeparatorRegion(sep_hor) points_co = '' for lmm in range(len(polygons_lines_to_be_written_in_xml[mm])): - points_co += str(int((polygons_lines_to_be_written_in_xml[mm][lmm,0,0] ) / self.scale_x)) + points_co += str(int((polygons_lines_to_be_written_in_xml[mm][lmm, 0, 0]) / self.scale_x)) points_co += ',' - points_co += str(int((polygons_lines_to_be_written_in_xml[mm][lmm,0,1] ) / self.scale_y)) + points_co += str(int((polygons_lines_to_be_written_in_xml[mm][lmm, 0, 1]) / self.scale_y)) points_co += ' ' sep_hor.get_Coords().set_points(points_co[:-1]) for mm in range(len(found_polygons_tables)): @@ -234,15 +271,21 @@ class EynollahXmlWriter: page.add_TableRegion(tab_region) points_co = '' for lmm in range(len(found_polygons_tables[mm])): - points_co += str(int((found_polygons_tables[mm][lmm,0,0] + page_coord[2]) / self.scale_x)) + points_co += str(int((found_polygons_tables[mm][lmm, 0, 0] + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((found_polygons_tables[mm][lmm,0,1] + page_coord[0]) / self.scale_y)) + points_co += str(int((found_polygons_tables[mm][lmm, 0, 1] + page_coord[0]) / self.scale_y)) points_co += ' ' tab_region.get_Coords().set_points(points_co[:-1]) return pcgts - def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines): + def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, + order_of_texts, id_of_texts, all_found_textline_polygons, + all_found_textline_polygons_h, all_box_coord, all_box_coord_h, + found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, + found_polygons_marginals, all_found_textline_polygons_marginals, + all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, + polygons_lines_to_be_written_in_xml, ocr_all_textlines): self.logger.debug('enter build_pagexml_full_layout') # create the file structure @@ -257,49 +300,63 @@ class EynollahXmlWriter: for mm in range(len(found_polygons_text_region)): textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord))) + Coords=CoordsType( + points=self.calculate_polygon_coords(found_polygons_text_region[mm], + page_coord))) page.add_TextRegion(textregion) - + if ocr_all_textlines: ocr_textlines = ocr_all_textlines[mm] else: ocr_textlines = None - self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines) + self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, + slopes, counter, ocr_textlines) self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) for mm in range(len(found_polygons_text_region_h)): textregion = TextRegionType(id=counter.next_region_id, type_='header', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) + Coords=CoordsType( + points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], + page_coord))) page.add_TextRegion(textregion) if ocr_all_textlines: ocr_textlines = ocr_all_textlines[mm] else: ocr_textlines = None - self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter, ocr_textlines) + self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, + slopes_h, counter, ocr_textlines) for mm in range(len(found_polygons_marginals)): marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord))) + Coords=CoordsType( + points=self.calculate_polygon_coords(found_polygons_marginals[mm], + page_coord))) page.add_TextRegion(marginal) - self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter) + self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, + all_box_coord_marginals, slopes_marginals, counter) for mm in range(len(found_polygons_drop_capitals)): dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord))) + Coords=CoordsType( + points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], + page_coord))) page.add_TextRegion(dropcapital) ###all_box_coord_drop = None ###slopes_drop = None ###self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None) for mm in range(len(found_polygons_text_region_img)): - page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) - + page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType( + points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) + for mm in range(len(polygons_lines_to_be_written_in_xml)): - page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0])))) - + page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType( + points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0, 0, 0, 0])))) + for mm in range(len(found_polygons_tables)): - page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord)))) + page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType( + points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord)))) return pcgts @@ -315,6 +372,5 @@ class EynollahXmlWriter: coords += str(int((value_bbox[0][0] + page_coord[2]) / self.scale_x)) coords += ',' coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y)) - coords=coords + ' ' + coords = coords + ' ' return coords[:-1] -