# pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring """ tool to extract table form data from alto xml data """ import math import os import sys import time import warnings from pathlib import Path from multiprocessing import Process, Queue, cpu_count from lxml import etree as ET from ocrd_utils import getLogger import cv2 import numpy as np os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" stderr = sys.stderr sys.stderr = open(os.devnull, "w") from keras import backend as K from keras.models import load_model sys.stderr = stderr import tensorflow as tf tf.get_logger().setLevel("ERROR") warnings.filterwarnings("ignore") from .utils.contour import ( filter_contours_area_of_image, find_contours_mean_y_diff, find_new_features_of_contoures, get_text_region_boxes_by_given_contours, get_textregion_contours_in_org_image, return_contours_of_image, return_contours_of_interested_region, return_contours_of_interested_region_by_min_size, return_contours_of_interested_textline, return_parent_contours, return_contours_of_interested_region_by_size, ) from .utils.rotate import ( rotate_image, rotation_not_90_func, rotation_not_90_func_full_layout ) from .utils.separate_lines import ( textline_contours_postprocessing, seperate_lines_new2, return_deskew_slop, ) from .utils.drop_capitals import ( adhere_drop_capital_region_into_cprresponding_textline, filter_small_drop_capitals_from_no_patch_layout ) from .utils.marginals import get_marginals from .utils.resize import resize_image from .utils import ( boosting_headers_by_longshot_region_segmentation, crop_image_inside_box, find_num_col, otsu_copy, otsu_copy_binary, delete_seperator_around, return_regions_without_seperators, put_drop_out_from_only_drop_model, putt_bb_of_drop_capitals_of_model_in_patches_in_layout, check_any_text_region_in_model_one_is_main_or_header, small_textlines_to_parent_adherence2, order_and_id_of_texts, order_of_regions, implent_law_head_main_not_parallel, return_hor_spliter_by_index, combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new, return_points_with_boundies, find_number_of_columns_in_document, return_boxes_of_images_by_order_of_reading_new, ) from .utils.xml import create_page_xml, add_textequiv from .utils.pil_cv2 import check_dpi from .plot import EynollahPlotter SLOPE_THRESHOLD = 0.13 RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45: DPI_THRESHOLD = 298 MAX_SLOPE = 999 KERNEL = np.ones((5, 5), np.uint8) class eynollah: def __init__( self, image_filename, image_filename_stem, dir_out, dir_models, dir_of_cropped_images=None, dir_of_layout=None, dir_of_deskewed=None, dir_of_all=None, enable_plotting=False, allow_enhancement=False, curved_line=False, full_layout=False, allow_scaling=False, headers_off=False ): self.image_filename = image_filename # XXX This does not seem to be a directory as the name suggests, but a file self.cont_page = [] self.dir_out = dir_out self.image_filename_stem = image_filename_stem self.allow_enhancement = allow_enhancement self.curved_line = curved_line self.full_layout = full_layout self.allow_scaling = allow_scaling self.headers_off = headers_off if not self.image_filename_stem: self.image_filename_stem = Path(Path(image_filename).name).stem self.plotter = None if not enable_plotting else EynollahPlotter( dir_of_all=dir_of_all, dir_of_deskewed=dir_of_deskewed, dir_of_cropped_images=dir_of_cropped_images, dir_of_layout=dir_of_layout, image_filename=image_filename, image_filename_stem=image_filename_stem, ) self.logger = getLogger('eynollah') self.dir_models = dir_models self.model_dir_of_enhancemnet = dir_models + "/model_enhancement.h5" self.model_dir_of_col_classifier = dir_models + "/model_scale_classifier.h5" self.model_region_dir_p = dir_models + "/model_main_covid19_lr5-5_scale_1_1_great.h5" self.model_region_dir_p2 = dir_models + "/model_main_home_corona3_rot.h5" self.model_region_dir_fully_np = dir_models + "/model_no_patches_class0_30eopch.h5" self.model_region_dir_fully = dir_models + "/model_3up_new_good_no_augmentation.h5" self.model_page_dir = dir_models + "/model_page_mixed_best.h5" self.model_region_dir_p_ens = dir_models + "/model_ensemble_s.h5" self.model_textline_dir = dir_models + "/model_textline_newspapers.h5" self._imgs = {} def imread(self, grayscale=False, uint8=True): key = 'img' if grayscale: key += '_grayscale' if uint8: key += '_uint8' if key not in self._imgs: if grayscale: img = cv2.imread(self.image_filename, cv2.IMREAD_GRAYSCALE) else: img = cv2.imread(self.image_filename) if uint8: img = img.astype(np.uint8) self._imgs[key] = img return self._imgs[key].copy() def predict_enhancement(self, img): self.logger.debug("enter predict_enhancement") model_enhancement, _ = self.start_new_session_and_model(self.model_dir_of_enhancemnet) img_height_model = model_enhancement.layers[len(model_enhancement.layers) - 1].output_shape[1] img_width_model = model_enhancement.layers[len(model_enhancement.layers) - 1].output_shape[2] if img.shape[0] < img_height_model: img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST) if img.shape[1] < img_width_model: img = cv2.resize(img, (img_height_model, img.shape[0]), interpolation=cv2.INTER_NEAREST) margin = int(0 * img_width_model) width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin img = img / float(255.0) img_h = img.shape[0] img_w = img.shape[1] prediction_true = np.zeros((img_h, img_w, 3)) nxf = img_w / float(width_mid) nyf = img_h / float(height_mid) nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) for i in range(nxf): for j in range(nyf): if i == 0: index_x_d = i * width_mid index_x_u = index_x_d + img_width_model else: index_x_d = i * width_mid index_x_u = index_x_d + img_width_model if j == 0: index_y_d = j * height_mid index_y_u = index_y_d + img_height_model else: index_y_d = j * height_mid index_y_u = index_y_d + img_height_model if index_x_u > img_w: index_x_u = img_w index_x_d = img_w - img_width_model if index_y_u > img_h: index_y_u = img_h index_y_d = img_h - img_height_model img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] label_p_pred = model_enhancement.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) seg = label_p_pred[0, :, :, :] seg = seg * 255 if i == 0 and j == 0: seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg elif i == nxf - 1 and j == nyf - 1: seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg elif i == 0 and j == nyf - 1: seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg elif i == nxf - 1 and j == 0: seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg elif i == 0 and j != 0 and j != nyf - 1: seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg elif i == nxf - 1 and j != 0 and j != nyf - 1: seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg elif i != 0 and i != nxf - 1 and j == 0: seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg elif i != 0 and i != nxf - 1 and j == nyf - 1: seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg else: seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg prediction_true = prediction_true.astype(int) return prediction_true def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred): self.logger.debug("enter calculate_width_height_by_columns") if num_col == 1 and width_early < 1100: img_w_new = 2000 img_h_new = int(img.shape[0] / float(img.shape[1]) * 2000) elif num_col == 1 and width_early >= 2500: img_w_new = 2000 img_h_new = int(img.shape[0] / float(img.shape[1]) * 2000) elif num_col == 1 and width_early >= 1100 and width_early < 2500: img_w_new = width_early img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 2 and width_early < 2000: img_w_new = 2400 img_h_new = int(img.shape[0] / float(img.shape[1]) * 2400) elif num_col == 2 and width_early >= 3500: img_w_new = 2400 img_h_new = int(img.shape[0] / float(img.shape[1]) * 2400) elif num_col == 2 and width_early >= 2000 and width_early < 3500: img_w_new = width_early img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 3 and width_early < 2000: img_w_new = 3000 img_h_new = int(img.shape[0] / float(img.shape[1]) * 3000) elif num_col == 3 and width_early >= 4000: img_w_new = 3000 img_h_new = int(img.shape[0] / float(img.shape[1]) * 3000) elif num_col == 3 and width_early >= 2000 and width_early < 4000: img_w_new = width_early img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 4 and width_early < 2500: img_w_new = 4000 img_h_new = int(img.shape[0] / float(img.shape[1]) * 4000) elif num_col == 4 and width_early >= 5000: img_w_new = 4000 img_h_new = int(img.shape[0] / float(img.shape[1]) * 4000) elif num_col == 4 and width_early >= 2500 and width_early < 5000: img_w_new = width_early img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 5 and width_early < 3700: img_w_new = 5000 img_h_new = int(img.shape[0] / float(img.shape[1]) * 5000) elif num_col == 5 and width_early >= 7000: img_w_new = 5000 img_h_new = int(img.shape[0] / float(img.shape[1]) * 5000) elif num_col == 5 and width_early >= 3700 and width_early < 7000: img_w_new = width_early img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 6 and width_early < 4500: img_w_new = 6500 # 5400 img_h_new = int(img.shape[0] / float(img.shape[1]) * 6500) else: img_w_new = width_early img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: img_new = np.copy(img) num_column_is_classified = False else: img_new = resize_image(img, img_h_new, img_w_new) num_column_is_classified = True return img_new, num_column_is_classified def resize_image_with_column_classifier(self, is_image_enhanced): self.logger.debug("enter resize_image_with_column_classifier") img = self.imread() _, page_coord = self.early_page_for_num_of_column_classification() model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) img_1ch = self.imread(grayscale=True, uint8=False) width_early = img_1ch.shape[1] img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] # plt.imshow(img_1ch) # plt.show() img_1ch = img_1ch / 255.0 img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) img_in[0, :, :, 0] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] label_p_pred = model_num_classifier.predict(img_in) num_col = np.argmax(label_p_pred[0]) + 1 self.logger.info("Found %s columns (%s)", num_col, label_p_pred) session_col_classifier.close() K.clear_session() img_new, _ = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) if img_new.shape[1] > img.shape[1]: img_new = self.predict_enhancement(img_new) is_image_enhanced = True return img, img_new, is_image_enhanced def resize_and_enhance_image_with_column_classifier(self): self.logger.debug("enter resize_and_enhance_image_with_column_classifier") dpi = check_dpi(self.image_filename) self.logger.info("Detected %s DPI", dpi) img = self.imread() _, page_coord = self.early_page_for_num_of_column_classification() model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) img_1ch = self.imread(grayscale=True) width_early = img_1ch.shape[1] img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] # plt.imshow(img_1ch) # plt.show() img_1ch = img_1ch / 255.0 img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) img_in[0, :, :, 0] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] # plt.imshow(img_in[0,:,:,:]) # plt.show() label_p_pred = model_num_classifier.predict(img_in) num_col = np.argmax(label_p_pred[0]) + 1 self.logger.info("Found %s columns (%s)", num_col, label_p_pred) session_col_classifier.close() K.clear_session() if dpi < DPI_THRESHOLD: img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) image_res = self.predict_enhancement(img_new) is_image_enhanced = True else: is_image_enhanced = False num_column_is_classified = True image_res = np.copy(img) self.logger.debug("exit resize_and_enhance_image_with_column_classifier") return is_image_enhanced, img, image_res, num_col, num_column_is_classified def get_image_and_scales(self, img_org, img_res, scale): self.logger.debug("enter get_image_and_scales") self.image = np.copy(img_res) self.image_org = np.copy(img_org) self.height_org = self.image.shape[0] self.width_org = self.image.shape[1] self.img_hight_int = int(self.image.shape[0] * scale) self.img_width_int = int(self.image.shape[1] * scale) self.scale_y = self.img_hight_int / float(self.image.shape[0]) self.scale_x = self.img_width_int / float(self.image.shape[1]) self.image = resize_image(self.image, self.img_hight_int, self.img_width_int) # Also set for the plotter # XXX TODO hacky if self.plotter: self.plotter.image_org = self.image_org self.plotter.scale_y = self.scale_y self.plotter.scale_x = self.scale_x def get_image_and_scales_after_enhancing(self, img_org, img_res): self.logger.debug("enter get_image_and_scales_after_enhancing") self.image = np.copy(img_res) self.image = self.image.astype(np.uint8) self.image_org = np.copy(img_org) self.height_org = self.image_org.shape[0] self.width_org = self.image_org.shape[1] self.scale_y = img_res.shape[0] / float(self.image_org.shape[0]) self.scale_x = img_res.shape[1] / float(self.image_org.shape[1]) def start_new_session_and_model(self, model_dir): self.logger.debug("enter start_new_session_and_model (model_dir=%s)", model_dir) config = tf.ConfigProto() config.gpu_options.allow_growth = True session = tf.InteractiveSession() model = load_model(model_dir, compile=False) return model, session def do_prediction(self, patches, img, model, marginal_of_patch_percent=0.1): self.logger.debug("enter do_prediction") img_height_model = model.layers[len(model.layers) - 1].output_shape[1] img_width_model = model.layers[len(model.layers) - 1].output_shape[2] if not patches: img_h_page = img.shape[0] img_w_page = img.shape[1] img = img / float(255.0) img = resize_image(img, img_height_model, img_width_model) label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) seg = np.argmax(label_p_pred, axis=3)[0] seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) else: if img.shape[0] < img_height_model: img = resize_image(img, img_height_model, img.shape[1]) if img.shape[1] < img_width_model: img = resize_image(img, img.shape[0], img_width_model) self.logger.info("Image dimensions: %sx%s", img_height_model, img_width_model) margin = int(marginal_of_patch_percent * img_height_model) width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin img = img / float(255.0) img = img.astype(np.float16) img_h = img.shape[0] img_w = img.shape[1] prediction_true = np.zeros((img_h, img_w, 3)) mask_true = np.zeros((img_h, img_w)) nxf = img_w / float(width_mid) nyf = img_h / float(height_mid) nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) for i in range(nxf): for j in range(nyf): if i == 0: index_x_d = i * width_mid index_x_u = index_x_d + img_width_model else: index_x_d = i * width_mid index_x_u = index_x_d + img_width_model if j == 0: index_y_d = j * height_mid index_y_u = index_y_d + img_height_model else: index_y_d = j * height_mid index_y_u = index_y_d + img_height_model if index_x_u > img_w: index_x_u = img_w index_x_d = img_w - img_width_model if index_y_u > img_h: index_y_u = img_h index_y_d = img_h - img_height_model img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) seg = np.argmax(label_p_pred, axis=3)[0] seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) if i == 0 and j == 0: seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color elif i == nxf - 1 and j == nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color elif i == 0 and j == nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color elif i == nxf - 1 and j == 0: seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color elif i == 0 and j != 0 and j != nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color elif i == nxf - 1 and j != 0 and j != nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color elif i != 0 and i != nxf - 1 and j == 0: seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color elif i != 0 and i != nxf - 1 and j == nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color else: seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color prediction_true = prediction_true.astype(np.uint8) return prediction_true def early_page_for_num_of_column_classification(self): self.logger.debug("enter early_page_for_num_of_column_classification") img = self.imread() model_page, session_page = self.start_new_session_and_model(self.model_page_dir) img = cv2.GaussianBlur(img, (5, 5), 0) img_page_prediction = self.do_prediction(False, img, model_page) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) thresh = cv2.dilate(thresh, KERNEL, iterations=3) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) cnt = contours[np.argmax(cnt_size)] x, y, w, h = cv2.boundingRect(cnt) box = [x, y, w, h] croped_page, page_coord = crop_image_inside_box(box, img) session_page.close() self.logger.debug("exit early_page_for_num_of_column_classification") return croped_page, page_coord def extract_page(self): self.logger.debug("enter extract_page") model_page, session_page = self.start_new_session_and_model(self.model_page_dir) img = cv2.GaussianBlur(self.image, (5, 5), 0) img_page_prediction = self.do_prediction(False, img, model_page) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) thresh = cv2.dilate(thresh, KERNEL, iterations=3) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) cnt = contours[np.argmax(cnt_size)] x, y, w, h = cv2.boundingRect(cnt) if x <= 30: w += x x = 0 if (self.image.shape[1] - (x + w)) <= 30: w = w + (self.image.shape[1] - (x + w)) if y <= 30: h = h + y y = 0 if (self.image.shape[0] - (y + h)) <= 30: h = h + (self.image.shape[0] - (y + h)) box = [x, y, w, h] croped_page, page_coord = crop_image_inside_box(box, self.image) self.cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) session_page.close() K.clear_session() self.logger.debug("exit extract_page") return croped_page, page_coord def extract_text_regions(self, img, patches, cols): self.logger.debug("enter extract_text_regions") img_height_h = img.shape[0] img_width_h = img.shape[1] model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully if patches else self.model_region_dir_fully_np) if not patches: img = otsu_copy_binary(img) img = img.astype(np.uint8) prediction_regions2 = None else: if cols == 1: img2 = otsu_copy_binary(img) img2 = img2.astype(np.uint8) img2 = resize_image(img2, int(img_height_h * 0.7), int(img_width_h * 0.7)) marginal_of_patch_percent = 0.1 prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent) prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) if cols == 2: img2 = otsu_copy_binary(img) img2 = img2.astype(np.uint8) img2 = resize_image(img2, int(img_height_h * 0.4), int(img_width_h * 0.4)) marginal_of_patch_percent = 0.1 prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent) prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) elif cols > 2: img2 = otsu_copy_binary(img) img2 = img2.astype(np.uint8) img2 = resize_image(img2, int(img_height_h * 0.3), int(img_width_h * 0.3)) marginal_of_patch_percent = 0.1 prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent) prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) if cols == 2: img = otsu_copy_binary(img) img = img.astype(np.uint8) if img_width_h >= 2000: img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) img = img.astype(np.uint8) if cols == 1: img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 0.5), int(img_width_h * 0.5)) img = img.astype(np.uint8) if cols == 3: if (self.scale_x == 1 and img_width_h > 3000) or (self.scale_x != 1 and img_width_h > 2800): img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 2800 / float(img_width_h)), 2800) else: img = otsu_copy_binary(img) img = img.astype(np.uint8) if cols == 4: if (self.scale_x == 1 and img_width_h > 4000) or (self.scale_x != 1 and img_width_h > 3700): img = otsu_copy_binary(img) img = img.astype(np.uint8) img= resize_image(img, int(img_height_h * 3700 / float(img_width_h)), 3700) else: img = otsu_copy_binary(img)#self.otsu_copy(img) img = img.astype(np.uint8) img= resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) if cols == 5: if self.scale_x == 1 and img_width_h > 5000: img = otsu_copy_binary(img) img = img.astype(np.uint8) img= resize_image(img, int(img_height_h * 0.7), int(img_width_h * 0.7)) else: img = otsu_copy_binary(img) img = img.astype(np.uint8) img= resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9) ) if cols >= 6: if img_width_h > 5600: img = otsu_copy_binary(img) img = img.astype(np.uint8) img= resize_image(img, int(img_height_h * 5600 / float(img_width_h)), 5600) else: img = otsu_copy_binary(img) img = img.astype(np.uint8) img= resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) marginal_of_patch_percent = 0.1 prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) session_region.close() self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): self.logger.debug("enter get_slopes_and_deskew_new") num_cores = cpu_count() queue_of_all_params = Queue() processes = [] nh = np.linspace(0, len(boxes), num_cores + 1) indexes_by_text_con = np.array(range(len(contours_par))) for i in range(num_cores): boxes_per_process = boxes[int(nh[i]) : int(nh[i + 1])] contours_per_process = contours[int(nh[i]) : int(nh[i + 1])] contours_par_per_process = contours_par[int(nh[i]) : int(nh[i + 1])] indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] processes.append(Process(target=self.do_work_of_slopes_new, args=(queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, contours_par_per_process, indexes_text_con_per_process, image_page_rotated, slope_deskew))) for i in range(num_cores): processes[i].start() slopes = [] all_found_texline_polygons = [] all_found_text_regions = [] all_found_text_regions_par = [] boxes = [] all_box_coord = [] all_index_text_con = [] for i in range(num_cores): list_all_par = queue_of_all_params.get(True) slopes_for_sub_process = list_all_par[0] polys_for_sub_process = list_all_par[1] boxes_for_sub_process = list_all_par[2] contours_for_subprocess = list_all_par[3] contours_par_for_subprocess = list_all_par[4] boxes_coord_for_subprocess = list_all_par[5] indexes_for_subprocess = list_all_par[6] for j in range(len(slopes_for_sub_process)): slopes.append(slopes_for_sub_process[j]) all_found_texline_polygons.append(polys_for_sub_process[j]) boxes.append(boxes_for_sub_process[j]) all_found_text_regions.append(contours_for_subprocess[j]) all_found_text_regions_par.append(contours_par_for_subprocess[j]) all_box_coord.append(boxes_coord_for_subprocess[j]) all_index_text_con.append(indexes_for_subprocess[j]) for i in range(num_cores): processes[i].join() self.logger.debug('slopes %s', slopes) self.logger.debug("exit get_slopes_and_deskew_new") return slopes, all_found_texline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew): self.logger.debug("enter get_slopes_and_deskew_new_curved") num_cores = cpu_count() queue_of_all_params = Queue() processes = [] nh = np.linspace(0, len(boxes), num_cores + 1) indexes_by_text_con = np.array(range(len(contours_par))) for i in range(num_cores): boxes_per_process = boxes[int(nh[i]) : int(nh[i + 1])] contours_per_process = contours[int(nh[i]) : int(nh[i + 1])] contours_par_per_process = contours_par[int(nh[i]) : int(nh[i + 1])] indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] processes.append(Process(target=self.do_work_of_slopes_new_curved, args=(queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, contours_par_per_process, image_page_rotated, mask_texts_only, num_col, scale_par, indexes_text_con_per_process, slope_deskew))) for i in range(num_cores): processes[i].start() slopes = [] all_found_texline_polygons = [] all_found_text_regions = [] all_found_text_regions_par = [] boxes = [] all_box_coord = [] all_index_text_con = [] for i in range(num_cores): list_all_par = queue_of_all_params.get(True) polys_for_sub_process = list_all_par[0] boxes_for_sub_process = list_all_par[1] contours_for_subprocess = list_all_par[2] contours_par_for_subprocess = list_all_par[3] boxes_coord_for_subprocess = list_all_par[4] indexes_for_subprocess = list_all_par[5] slopes_for_sub_process = list_all_par[6] for j in range(len(polys_for_sub_process)): slopes.append(slopes_for_sub_process[j]) all_found_texline_polygons.append(polys_for_sub_process[j]) boxes.append(boxes_for_sub_process[j]) all_found_text_regions.append(contours_for_subprocess[j]) all_found_text_regions_par.append(contours_par_for_subprocess[j]) all_box_coord.append(boxes_coord_for_subprocess[j]) all_index_text_con.append(indexes_for_subprocess[j]) for i in range(num_cores): processes[i].join() # print(slopes,'slopes') return all_found_texline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con, slopes def do_work_of_slopes_new_curved(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, image_page_rotated, mask_texts_only, num_col, scale_par, indexes_r_con_per_pro, slope_deskew): self.logger.debug("enter do_work_of_slopes_new_curved") slopes_per_each_subprocess = [] bounding_box_of_textregion_per_each_subprocess = [] textlines_rectangles_per_each_subprocess = [] contours_textregion_per_each_subprocess = [] contours_textregion_par_per_each_subprocess = [] all_box_coord_per_process = [] index_by_text_region_contours = [] textline_cnt_seperated = np.zeros(textline_mask_tot_ea.shape) for mv in range(len(boxes_text)): all_text_region_raw = textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] all_text_region_raw = all_text_region_raw.astype(np.uint8) img_int_p = all_text_region_raw[:, :] # img_int_p=cv2.erode(img_int_p,KERNEL,iterations = 2) # plt.imshow(img_int_p) # plt.show() if img_int_p.shape[0] / img_int_p.shape[1] < 0.1: slopes_per_each_subprocess.append(0) slope_for_all = [slope_deskew][0] else: try: textline_con, hierachy = return_contours_of_image(img_int_p) textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierachy, max_area=1, min_area=0.0008) y_diff_mean = find_contours_mean_y_diff(textline_con_fil) sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) img_int_p[img_int_p > 0] = 1 slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=self.plotter) if abs(slope_for_all) < 0.5: slope_for_all = [slope_deskew][0] # old method # slope_for_all=self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[mv],denoised,contours[mv]) # text_patch_processed=textline_contours_postprocessing(gada) except Exception as why: self.logger.error(why) slope_for_all = MAX_SLOPE if slope_for_all == MAX_SLOPE: slope_for_all = [slope_deskew][0] slopes_per_each_subprocess.append(slope_for_all) index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) crop_img, crop_coor = crop_image_inside_box(boxes_text[mv], image_page_rotated) if abs(slope_for_all) < 45: # all_box_coord.append(crop_coor) textline_region_in_image = np.zeros(textline_mask_tot_ea.shape) cnt_o_t_max = contours_par_per_process[mv] x, y, w, h = cv2.boundingRect(cnt_o_t_max) mask_biggest = np.zeros(mask_texts_only.shape) mask_biggest = cv2.fillPoly(mask_biggest, pts=[cnt_o_t_max], color=(1, 1, 1)) mask_region_in_patch_region = mask_biggest[y : y + h, x : x + w] textline_biggest_region = mask_biggest * textline_mask_tot_ea # print(slope_for_all,'slope_for_all') textline_rotated_seperated = seperate_lines_new2(textline_biggest_region[y : y + h, x : x + w], 0, num_col, slope_for_all, plotter=self.plotter) # new line added ##print(np.shape(textline_rotated_seperated),np.shape(mask_biggest)) textline_rotated_seperated[mask_region_in_patch_region[:, :] != 1] = 0 # till here textline_cnt_seperated[y : y + h, x : x + w] = textline_rotated_seperated textline_region_in_image[y : y + h, x : x + w] = textline_rotated_seperated # plt.imshow(textline_region_in_image) # plt.show() # plt.imshow(textline_cnt_seperated) # plt.show() pixel_img = 1 cnt_textlines_in_image = return_contours_of_interested_textline(textline_region_in_image, pixel_img) textlines_cnt_per_region = [] for jjjj in range(len(cnt_textlines_in_image)): mask_biggest2 = np.zeros(mask_texts_only.shape) mask_biggest2 = cv2.fillPoly(mask_biggest2, pts=[cnt_textlines_in_image[jjjj]], color=(1, 1, 1)) if num_col + 1 == 1: mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=5) else: mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4) pixel_img = 1 mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par)) cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img) try: textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0]) except Exception as why: self.logger.error(why) else: add_boxes_coor_into_textlines = True textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv], add_boxes_coor_into_textlines) add_boxes_coor_into_textlines = False # print(np.shape(textlines_cnt_per_region),'textlines_cnt_per_region') textlines_rectangles_per_each_subprocess.append(textlines_cnt_per_region) bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv]) contours_textregion_per_each_subprocess.append(contours_per_process[mv]) contours_textregion_par_per_each_subprocess.append(contours_par_per_process[mv]) all_box_coord_per_process.append(crop_coor) queue_of_all_params.put([textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours, slopes_per_each_subprocess]) def do_work_of_slopes_new(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, indexes_r_con_per_pro, image_page_rotated, slope_deskew): self.logger.debug('enter do_work_of_slopes_new') slopes_per_each_subprocess = [] bounding_box_of_textregion_per_each_subprocess = [] textlines_rectangles_per_each_subprocess = [] contours_textregion_per_each_subprocess = [] contours_textregion_par_per_each_subprocess = [] all_box_coord_per_process = [] index_by_text_region_contours = [] for mv in range(len(boxes_text)): crop_img,crop_coor=crop_image_inside_box(boxes_text[mv],image_page_rotated) mask_textline=np.zeros((textline_mask_tot_ea.shape)) mask_textline=cv2.fillPoly(mask_textline,pts=[contours_per_process[mv]],color=(1,1,1)) denoised=None all_text_region_raw=(textline_mask_tot_ea*mask_textline[:,:])[boxes_text[mv][1]:boxes_text[mv][1]+boxes_text[mv][3] , boxes_text[mv][0]:boxes_text[mv][0]+boxes_text[mv][2] ] all_text_region_raw=all_text_region_raw.astype(np.uint8) img_int_p=all_text_region_raw[:,:]#self.all_text_region_raw[mv] img_int_p=cv2.erode(img_int_p,KERNEL,iterations = 2) if img_int_p.shape[0]/img_int_p.shape[1]<0.1: slopes_per_each_subprocess.append(0) slope_for_all = [slope_deskew][0] all_text_region_raw = textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv], 0) textlines_rectangles_per_each_subprocess.append(cnt_clean_rot) index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv]) else: try: textline_con, hierachy = return_contours_of_image(img_int_p) textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierachy, max_area=1, min_area=0.00008) y_diff_mean = find_contours_mean_y_diff(textline_con_fil) sigma_des = int(y_diff_mean * (4.0 / 40.0)) if sigma_des < 1: sigma_des = 1 img_int_p[img_int_p > 0] = 1 slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=self.plotter) if abs(slope_for_all) <= 0.5: slope_for_all = [slope_deskew][0] except Exception as why: self.logger.error(why) slope_for_all = MAX_SLOPE if slope_for_all == MAX_SLOPE: slope_for_all = [slope_deskew][0] slopes_per_each_subprocess.append(slope_for_all) mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) # plt.imshow(mask_only_con_region) # plt.show() all_text_region_raw = np.copy(textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]) mask_only_con_region = mask_only_con_region[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] ##plt.imshow(textline_mask_tot_ea) ##plt.show() ##plt.imshow(all_text_region_raw) ##plt.show() ##plt.imshow(mask_only_con_region) ##plt.show() all_text_region_raw[mask_only_con_region == 0] = 0 cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv]) textlines_rectangles_per_each_subprocess.append(cnt_clean_rot) index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv]) contours_textregion_per_each_subprocess.append(contours_per_process[mv]) contours_textregion_par_per_each_subprocess.append(contours_par_per_process[mv]) all_box_coord_per_process.append(crop_coor) queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours]) def textline_contours(self, img, patches, scaler_h, scaler_w): self.logger.debug('enter textline_contours') model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir if patches else self.model_textline_dir_np) img = img.astype(np.uint8) img_org = np.copy(img) img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) prediction_textline = self.do_prediction(patches, img, model_textline) prediction_textline = resize_image(prediction_textline, img_h, img_w) prediction_textline_longshot = self.do_prediction(False, img, model_textline) prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) ##plt.imshow(prediction_textline_streched[:,:,0]) ##plt.show() session_textline.close() return prediction_textline[:, :, 0], prediction_textline_longshot_true_size[:, :, 0] def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process): self.logger.debug('enter do_work_of_slopes') slope_biggest = 0 slopes_sub = [] boxes_sub_new = [] poly_sub = [] for mv in range(len(boxes_per_process)): crop_img, _ = crop_image_inside_box(boxes_per_process[mv], np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) crop_img = crop_img[:, :, 0] crop_img = cv2.erode(crop_img, KERNEL, iterations=2) try: textline_con, hierachy = return_contours_of_image(crop_img) textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierachy, max_area=1, min_area=0.0008) y_diff_mean = find_contours_mean_y_diff(textline_con_fil) sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) crop_img[crop_img > 0] = 1 slope_corresponding_textregion = return_deskew_slop(crop_img, sigma_des, plotter=self.plotter) except Exception as why: self.logger.error(why) slope_corresponding_textregion = MAX_SLOPE if slope_corresponding_textregion == MAX_SLOPE: slope_corresponding_textregion = slope_biggest slopes_sub.append(slope_corresponding_textregion) cnt_clean_rot = textline_contours_postprocessing(crop_img, slope_corresponding_textregion, contours_per_process[mv], boxes_per_process[mv]) poly_sub.append(cnt_clean_rot) boxes_sub_new.append(boxes_per_process[mv]) q.put(slopes_sub) poly.put(poly_sub) box_sub.put(boxes_sub_new) def calculate_polygon_coords(self, contour_list, i, page_coord): self.logger.debug('enter calculate_polygon_coords') coords = '' for j in range(len(contour_list[i])): if len(contour_list[i][j]) == 2: coords += str(int((contour_list[i][j][0] + page_coord[2]) / self.scale_x)) coords += ',' coords += str(int((contour_list[i][j][1] + page_coord[0]) / self.scale_y)) else: coords += str(int((contour_list[i][j][0][0] + page_coord[2]) / self.scale_x)) coords += ',' coords += str(int((contour_list[i][j][0][1] + page_coord[0]) / self.scale_y)) if j < len(contour_list[i]) - 1: coords=coords + ' ' #print(coords) return coords def calculate_page_coords(self): self.logger.debug('enter calculate_page_coords') points_page_print = "" for lmm, contour in enumerate(self.cont_page[0]): if len(contour) == 2: points_page_print += str(int((contour[0]) / self.scale_x)) points_page_print += ',' points_page_print += str(int((contour[1]) / self.scale_y)) else: points_page_print += str(int((contour[0][0]) / self.scale_x)) points_page_print += ',' points_page_print += str(int((contour[0][1] ) / self.scale_y)) points_page_print = points_page_print + ' ' return points_page_print[:-1] def xml_reading_order(self, page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals): """ XXX side-effect: extends id_of_marginalia """ region_order = ET.SubElement(page, 'ReadingOrder') region_order_sub = ET.SubElement(region_order, 'OrderedGroup') region_order_sub.set('id', "ro357564684568544579089") indexer_region = 0 for vj in order_of_texts: name = "coord_text_%s" % vj name = ET.SubElement(region_order_sub, 'RegionRefIndexed') name.set('index', str(indexer_region)) name.set('regionRef', id_of_texts[vj]) indexer_region += 1 for vm in range(len(found_polygons_marginals)): id_of_marginalia.append('r%s' % indexer_region) name = "coord_text_%s" % indexer_region name = ET.SubElement(region_order_sub, 'RegionRefIndexed') name.set('index', str(indexer_region)) name.set('regionRef', 'r%s' % indexer_region) indexer_region += 1 def serialize_lines_in_marginal(self, marginal, all_found_texline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, id_indexer_l): for j in range(len(all_found_texline_polygons_marginals[marginal_idx])): textline = ET.SubElement(marginal, 'TextLine') textline.set('id', 'l%s' % id_indexer_l) id_indexer_l += 1 coord = ET.SubElement(textline, 'Coords') add_textequiv(textline) points_co = '' for l in range(len(all_found_texline_polygons_marginals[marginal_idx][j])): if not self.curved_line: if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2: points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) points_co += ',' points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y)) else: points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) points_co += ',' points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0])/self.scale_y)) else: if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2: points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x)) points_co += ',' points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + page_coord[0]) / self.scale_y)) else: points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + page_coord[2]) / self.scale_x)) points_co += ',' points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + page_coord[0]) / self.scale_y)) if l < len(all_found_texline_polygons_marginals[marginal_idx][j]) - 1: points_co += ' ' coord.set('points',points_co) return id_indexer_l def serialize_lines_in_region(self, textregion, all_found_texline_polygons, region_idx, page_coord, all_box_coord, slopes, id_indexer_l): self.logger.debug('enter serialize_lines_in_region') for j in range(len(all_found_texline_polygons[region_idx])): textline = ET.SubElement(textregion, 'TextLine') textline.set('id', 'l%s' % id_indexer_l) id_indexer_l += 1 coord = ET.SubElement(textline, 'Coords') add_textequiv(textline) points_co = '' for l in range(len(all_found_texline_polygons[region_idx][j])): if not self.curved_line: if len(all_found_texline_polygons[region_idx][j][l])==2: textline_x_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][0] + all_box_coord[region_idx][2] + page_coord[2]) / self.scale_x)) textline_y_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][1] + all_box_coord[region_idx][0] + page_coord[0]) / self.scale_y)) else: textline_x_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][0][0] + all_box_coord[region_idx][2] + page_coord[2]) / self.scale_x)) textline_y_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][0][1] + all_box_coord[region_idx][0] + page_coord[0]) / self.scale_y)) points_co += str(textline_x_coord) points_co += ',' points_co += str(textline_y_coord) if self.curved_line and np.abs(slopes[region_idx]) <= 45: if len(all_found_texline_polygons[region_idx][j][l]) == 2: points_co += str(int((all_found_texline_polygons[region_idx][j][l][0] + page_coord[2]) / self.scale_x)) points_co += ',' points_co += str(int((all_found_texline_polygons[region_idx][j][l][1] + page_coord[0]) / self.scale_y)) else: points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][0] + page_coord[2]) / self.scale_x)) points_co += ',' points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][1] + page_coord[0])/self.scale_y)) elif self.curved_line and np.abs(slopes[region_idx]) > 45: if len(all_found_texline_polygons[region_idx][j][l])==2: points_co += str(int((all_found_texline_polygons[region_idx][j][l][0] + all_box_coord[region_idx][2]+page_coord[2])/self.scale_x)) points_co += ',' points_co += str(int((all_found_texline_polygons[region_idx][j][l][1] + all_box_coord[region_idx][0]+page_coord[0])/self.scale_y)) else: points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][0] + all_box_coord[region_idx][2]+page_coord[2])/self.scale_x)) points_co += ',' points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][1] + all_box_coord[region_idx][0]+page_coord[0])/self.scale_y)) if l < len(all_found_texline_polygons[region_idx][j]) - 1: points_co += ' ' coord.set('points',points_co) return id_indexer_l def write_into_page_xml(self, pcgts): self.logger.info("filename stem: '%s'", self.image_filename_stem) tree = ET.ElementTree(pcgts) tree.write(os.path.join(self.dir_out, self.image_filename_stem) + ".xml") def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_texline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes): self.logger.debug('enter build_pagexml_no_full_layout') # create the file structure pcgts, page = create_page_xml(self.image_filename, self.height_org, self.width_org) page_print_sub = ET.SubElement(page, "Border") coord_page = ET.SubElement(page_print_sub, "Coords") coord_page.set('points', self.calculate_page_coords()) id_of_marginalia = [] id_indexer = 0 id_indexer_l = 0 if len(found_polygons_text_region) > 0: self.xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals) for mm in range(len(found_polygons_text_region)): textregion = ET.SubElement(page, 'TextRegion') textregion.set('id', 'r%s' % id_indexer) id_indexer += 1 textregion.set('type', 'paragraph') coord_text = ET.SubElement(textregion, 'Coords') coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord)) id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, id_indexer_l) add_textequiv(textregion) for marginal_idx in range(len(found_polygons_marginals)): marginal = ET.SubElement(page, 'TextRegion') marginal.set('id', id_of_marginalia[mm]) marginal.set('type', 'marginalia') coord_text = ET.SubElement(marginal, 'Coords') coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals, mm, page_coord)) self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, id_indexer_l) id_indexer = len(found_polygons_text_region) + len(found_polygons_marginals) for mm in range(len(found_polygons_text_region_img)): textregion = ET.SubElement(page, 'ImageRegion') textregion.set('id', 'r%s' % id_indexer) id_indexer += 1 coord_text = ET.SubElement(textregion, 'Coords') points_co = '' for lmm in range(len(found_polygons_text_region_img[mm])): points_co += str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x)) points_co += ',' points_co += str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y)) if lmm < len(found_polygons_text_region_img[mm]) - 1: points_co += ' ' coord_text.set('points', points_co) return pcgts def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_texline_polygons, all_found_texline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes): self.logger.debug('enter build_pagexml_full_layout') # create the file structure pcgts, page = create_page_xml(self.image_filename, self.height_org, self.width_org) page_print_sub = ET.SubElement(page, "Border") coord_page = ET.SubElement(page_print_sub, "Coords") coord_page.set('points', self.calculate_page_coords()) id_indexer = 0 id_indexer_l = 0 id_of_marginalia = [] if len(found_polygons_text_region) > 0: self.xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals) for mm in range(len(found_polygons_text_region)): textregion=ET.SubElement(page, 'TextRegion') textregion.set('id', 'r%s' % id_indexer) id_indexer += 1 textregion.set('type', 'paragraph') coord_text = ET.SubElement(textregion, 'Coords') coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord)) id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, id_indexer_l) add_textequiv(textregion) self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) if len(found_polygons_text_region_h) > 0: for mm in range(len(found_polygons_text_region_h)): textregion=ET.SubElement(page, 'TextRegion') textregion.set('id', 'r%s' % id_indexer) id_indexer += 1 textregion.set('type','header') coord_text = ET.SubElement(textregion, 'Coords') coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_h, mm, page_coord)) id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord_h, slopes, id_indexer_l) add_textequiv(textregion) if len(found_polygons_drop_capitals) > 0: id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals) for mm in range(len(found_polygons_drop_capitals)): textregion=ET.SubElement(page, 'TextRegion') textregion.set('id',' r%s' % id_indexer) id_indexer += 1 textregion.set('type', 'drop-capital') coord_text = ET.SubElement(textregion, 'Coords') coord_text.set('points', self.calculate_polygon_coords(found_polygons_drop_capitals, mm, page_coord)) add_textequiv(textregion) for marginal_idx in range(len(found_polygons_marginals)): marginal = ET.SubElement(page, 'TextRegion') add_textequiv(textregion) marginal.set('id', id_of_marginalia[mm]) marginal.set('type', 'marginalia') coord_text = ET.SubElement(marginal, 'Coords') coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals, mm, page_coord)) self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, id_indexer_l) id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals) + len(found_polygons_drop_capitals) for mm in range(len(found_polygons_text_region_img)): textregion=ET.SubElement(page, 'ImageRegion') textregion.set('id', 'r%s' % id_indexer) id_indexer += 1 coord_text = ET.SubElement(textregion, 'Coords') coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_img, mm, page_coord)) for mm in range(len(found_polygons_tables)): textregion = ET.SubElement(page, 'TableRegion') textregion.set('id', 'r%s' %id_indexer) id_indexer += 1 coord_text = ET.SubElement(textregion, 'Coords') coord_text.set('points', self.calculate_polygon_coords(found_polygons_tables, mm, page_coord)) return pcgts def get_regions_from_xy_2models(self,img,is_image_enhanced): self.logger.debug("enter get_regions_from_xy_2models") img_org = np.copy(img) img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) ratio_y=1.3 ratio_x=1 img = resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) prediction_regions_org_y = self.do_prediction(True, img, model_region) prediction_regions_org_y = resize_image(prediction_regions_org_y, img_height_h, img_width_h ) #plt.imshow(prediction_regions_org_y[:,:,0]) #plt.show() prediction_regions_org_y = prediction_regions_org_y[:,:,0] mask_zeros_y = (prediction_regions_org_y[:,:]==0)*1 img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1))) prediction_regions_org = self.do_prediction(True, img, model_region) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) ##plt.imshow(prediction_regions_org[:,:,0]) ##plt.show() prediction_regions_org=prediction_regions_org[:,:,0] prediction_regions_org[(prediction_regions_org[:,:]==1) & (mask_zeros_y[:,:]==1)]=0 session_region.close() model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p2) img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1])) prediction_regions_org2 = self.do_prediction(True, img, model_region, 0.2) prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) #plt.imshow(prediction_regions_org2[:,:,0]) #plt.show() ##prediction_regions_org=prediction_regions_org[:,:,0] session_region.close() mask_zeros2 = (prediction_regions_org2[:,:,0] == 0) mask_lines2 = (prediction_regions_org2[:,:,0] == 3) text_sume_early = (prediction_regions_org[:,:] == 1).sum() prediction_regions_org_copy = np.copy(prediction_regions_org) prediction_regions_org_copy[(prediction_regions_org_copy[:,:]==1) & (mask_zeros2[:,:]==1)] = 0 text_sume_second = ((prediction_regions_org_copy[:,:]==1)*1).sum() rate_two_models = text_sume_second / float(text_sume_early) * 100 self.logger.info("ratio_of_two_models: %s", rate_two_models) if not(is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD): prediction_regions_org = np.copy(prediction_regions_org_copy) prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3 mask_lines_only=(prediction_regions_org[:,:]==3)*1 prediction_regions_org = cv2.erode(prediction_regions_org[:,:], KERNEL, iterations=2) #plt.imshow(text_region2_1st_channel) #plt.show() prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2) mask_texts_only=(prediction_regions_org[:,:]==1)*1 mask_images_only=(prediction_regions_org[:,:]==2)*1 polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) polygons_of_only_images = return_contours_of_interested_region(mask_images_only, 1) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) text_regions_p_true = cv2.fillPoly(text_regions_p_true,pts = polygons_of_only_lines, color=(3, 3, 3)) text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) K.clear_session() return text_regions_p_true def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): self.logger.debug("enter do_order_of_regions_full_layout") cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contoures(contours_only_text_parent) cx_text_only_h, cy_text_only_h, x_min_text_only_h, _, _, _, y_cor_x_min_main_h = find_new_features_of_contoures(contours_only_text_parent_h) try: arg_text_con = [] for ii in range(len(cx_text_only)): for jj in range(len(boxes)): if (x_min_text_only[ii] + 80) >= boxes[jj][0] and (x_min_text_only[ii] + 80) < boxes[jj][1] and y_cor_x_min_main[ii] >= boxes[jj][2] and y_cor_x_min_main[ii] < boxes[jj][3]: arg_text_con.append(jj) break args_contours = np.array(range(len(arg_text_con))) arg_text_con_h = [] for ii in range(len(cx_text_only_h)): for jj in range(len(boxes)): if (x_min_text_only_h[ii] + 80) >= boxes[jj][0] and (x_min_text_only_h[ii] + 80) < boxes[jj][1] and y_cor_x_min_main_h[ii] >= boxes[jj][2] and y_cor_x_min_main_h[ii] < boxes[jj][3]: arg_text_con_h.append(jj) break args_contours_h = np.array(range(len(arg_text_con_h))) order_by_con_head = np.zeros(len(arg_text_con_h)) order_by_con_main = np.zeros(len(arg_text_con)) ref_point = 0 order_of_texts_tot = [] id_of_texts_tot = [] for iij in range(len(boxes)): args_contours_box = args_contours[np.array(arg_text_con) == iij] args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij] con_inter_box = [] con_inter_box_h = [] for i in range(len(args_contours_box)): con_inter_box.append(contours_only_text_parent[args_contours_box[i]]) for i in range(len(args_contours_box_h)): con_inter_box_h.append(contours_only_text_parent_h[args_contours_box_h[i]]) indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(textline_mask_tot[int(boxes[iij][2]) : int(boxes[iij][3]), int(boxes[iij][0]) : int(boxes[iij][1])], con_inter_box, con_inter_box_h, boxes[iij][2]) order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_sorted_head = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 2] indexes_by_type_head = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 2] for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] tartib = np.where(indexes_sorted == arg_order_v)[0][0] order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = tartib + ref_point for zahler, _ in enumerate(args_contours_box_h): arg_order_v = indexes_sorted_head[zahler] tartib = np.where(indexes_sorted == arg_order_v)[0][0] order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = tartib + ref_point for jji in range(len(id_of_texts)): order_of_texts_tot.append(order_of_texts[jji] + ref_point) id_of_texts_tot.append(id_of_texts[jji]) ref_point = ref_point + len(id_of_texts) order_of_texts_tot = [] for tj1 in range(len(contours_only_text_parent)): order_of_texts_tot.append(int(order_by_con_main[tj1])) for tj1 in range(len(contours_only_text_parent_h)): order_of_texts_tot.append(int(order_by_con_head[tj1])) order_text_new = [] for iii in range(len(order_of_texts_tot)): tartib_new = np.where(np.array(order_of_texts_tot) == iii)[0][0] order_text_new.append(tartib_new) except Exception as why: self.logger.error(why) arg_text_con = [] for ii in range(len(cx_text_only)): for jj in range(len(boxes)): if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) break args_contours = np.array(range(len(arg_text_con))) order_by_con_main = np.zeros(len(arg_text_con)) ############################# head arg_text_con_h = [] for ii in range(len(cx_text_only_h)): for jj in range(len(boxes)): if cx_text_only_h[ii] >= boxes[jj][0] and cx_text_only_h[ii] < boxes[jj][1] and cy_text_only_h[ii] >= boxes[jj][2] and cy_text_only_h[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located arg_text_con_h.append(jj) break arg_arg_text_con_h = np.argsort(arg_text_con_h) args_contours_h = np.array(range(len(arg_text_con_h))) order_by_con_head = np.zeros(len(arg_text_con_h)) ref_point = 0 order_of_texts_tot = [] id_of_texts_tot = [] for iij in range(len(boxes)): args_contours_box = args_contours[np.array(arg_text_con) == iij] args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij] con_inter_box = [] con_inter_box_h = [] for box in args_contours_box: con_inter_box.append(contours_only_text_parent[box]) for box in args_contours_box_h: con_inter_box_h.append(contours_only_text_parent_h[box]) indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(textline_mask_tot[int(boxes[iij][2]) : int(boxes[iij][3]), int(boxes[iij][0]) : int(boxes[iij][1])], con_inter_box, con_inter_box_h, boxes[iij][2]) order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_sorted_head = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 2] indexes_by_type_head = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 2] for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] tartib = np.where(indexes_sorted == arg_order_v)[0][0] order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = tartib + ref_point for zahler, _ in enumerate(args_contours_box_h): arg_order_v = indexes_sorted_head[zahler] tartib = np.where(indexes_sorted == arg_order_v)[0][0] order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = tartib + ref_point for jji in range(len(id_of_texts)): order_of_texts_tot.append(order_of_texts[jji] + ref_point) id_of_texts_tot.append(id_of_texts[jji]) ref_point = ref_point + len(id_of_texts) order_of_texts_tot = [] for tj1 in range(len(contours_only_text_parent)): order_of_texts_tot.append(int(order_by_con_main[tj1])) for tj1 in range(len(contours_only_text_parent_h)): order_of_texts_tot.append(int(order_by_con_head[tj1])) order_text_new = [] for iii in range(len(order_of_texts_tot)): tartib_new = np.where(np.array(order_of_texts_tot) == iii)[0][0] order_text_new.append(tartib_new) return order_text_new, id_of_texts_tot def do_order_of_regions_no_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): self.logger.debug("enter do_order_of_regions_no_full_layout") cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contoures(contours_only_text_parent) try: arg_text_con = [] for ii in range(len(cx_text_only)): for jj in range(len(boxes)): if (x_min_text_only[ii] + 80) >= boxes[jj][0] and (x_min_text_only[ii] + 80) < boxes[jj][1] and y_cor_x_min_main[ii] >= boxes[jj][2] and y_cor_x_min_main[ii] < boxes[jj][3]: arg_text_con.append(jj) break args_contours = np.array(range(len(arg_text_con))) order_by_con_main = np.zeros(len(arg_text_con)) ref_point = 0 order_of_texts_tot = [] id_of_texts_tot = [] for iij in range(len(boxes)): args_contours_box = args_contours[np.array(arg_text_con) == iij] con_inter_box = [] con_inter_box_h = [] for i in range(len(args_contours_box)): con_inter_box.append(contours_only_text_parent[args_contours_box[i]]) indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(textline_mask_tot[int(boxes[iij][2]) : int(boxes[iij][3]), int(boxes[iij][0]) : int(boxes[iij][1])], con_inter_box, con_inter_box_h, boxes[iij][2]) order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] for zahler, mtv in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] tartib = np.where(indexes_sorted == arg_order_v)[0][0] order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = tartib + ref_point for jji in range(len(id_of_texts)): order_of_texts_tot.append(order_of_texts[jji] + ref_point) id_of_texts_tot.append(id_of_texts[jji]) ref_point = ref_point + len(id_of_texts) order_of_texts_tot = [] for tj1 in range(len(contours_only_text_parent)): order_of_texts_tot.append(int(order_by_con_main[tj1])) order_text_new = [] for iii in range(len(order_of_texts_tot)): tartib_new = np.where(np.array(order_of_texts_tot) == iii)[0][0] order_text_new.append(tartib_new) except Exception as why: self.logger.error(why) arg_text_con = [] for ii in range(len(cx_text_only)): for jj in range(len(boxes)): if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) break args_contours = np.array(range(len(arg_text_con))) order_by_con_main = np.zeros(len(arg_text_con)) ref_point = 0 order_of_texts_tot = [] id_of_texts_tot = [] for iij in range(len(boxes)): args_contours_box = args_contours[np.array(arg_text_con) == iij] con_inter_box = [] con_inter_box_h = [] for i in range(len(args_contours_box)): con_inter_box.append(contours_only_text_parent[args_contours_box[i]]) indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(textline_mask_tot[int(boxes[iij][2]) : int(boxes[iij][3]), int(boxes[iij][0]) : int(boxes[iij][1])], con_inter_box, con_inter_box_h, boxes[iij][2]) order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_sorted_head = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 2] indexes_by_type_head = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 2] for zahler, mtv in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] tartib = np.where(indexes_sorted == arg_order_v)[0][0] order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = tartib + ref_point for jji in range(len(id_of_texts)): order_of_texts_tot.append(order_of_texts[jji] + ref_point) id_of_texts_tot.append(id_of_texts[jji]) ref_point = ref_point + len(id_of_texts) order_of_texts_tot = [] for tj1 in range(len(contours_only_text_parent)): order_of_texts_tot.append(int(order_by_con_main[tj1])) order_text_new = [] for iii in range(len(order_of_texts_tot)): tartib_new = np.where(np.array(order_of_texts_tot) == iii)[0][0] order_text_new.append(tartib_new) return order_text_new, id_of_texts_tot def do_order_of_regions(self, *args, **kwargs): if self.full_layout: return self.do_order_of_regions_full_layout(*args, **kwargs) return self.do_order_of_regions_no_full_layout(*args, **kwargs) def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_column_is_classified): img_g = self.imread(grayscale=True, uint8=True) img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3)) img_g3 = img_g3.astype(np.uint8) img_g3[:, :, 0] = img_g[:, :] img_g3[:, :, 1] = img_g[:, :] img_g3[:, :, 2] = img_g[:, :] image_page, page_coord = self.extract_page() if self.plotter: self.plotter.save_page_image(image_page) img_g3_page = img_g3[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3], :] text_regions_p_1 = text_regions_p_1[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] mask_images = (text_regions_p_1[:, :] == 2) * 1 mask_images = mask_images.astype(np.uint8) mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10) mask_lines = (text_regions_p_1[:, :] == 3) * 1 mask_lines = mask_lines.astype(np.uint8) img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) img_only_regions = cv2.erode(img_only_regions_with_sep[:, :], KERNEL, iterations=6) try: num_col, peaks_neg_fin = find_num_col(img_only_regions, multiplier=6.0) num_col = num_col + 1 if not num_column_is_classified: num_col_classifier = num_col + 1 except Exception as why: self.logger.error(why) num_col = None peaks_neg_fin = [] return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1 def run_enhancement(self): self.logger.info("resize and enhance image") is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified = self.resize_and_enhance_image_with_column_classifier() self.logger.info("Image is %senhanced", '' if is_image_enhanced else 'not ') K.clear_session() scale = 1 if is_image_enhanced: if self.allow_enhancement: cv2.imwrite(os.path.join(self.dir_out, self.image_filename_stem) + ".tif", img_res) img_res = img_res.astype(np.uint8) self.get_image_and_scales(img_org, img_res, scale) else: self.get_image_and_scales_after_enhancing(img_org, img_res) else: if self.allow_enhancement: self.get_image_and_scales(img_org, img_res, scale) else: self.get_image_and_scales(img_org, img_res, scale) if self.allow_scaling: img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced) self.get_image_and_scales_after_enhancing(img_org, img_res) return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified def run_textline(self, image_page): scaler_h_textline = 1 # 1.2#1.2 scaler_w_textline = 1 # 0.9#1 textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline) K.clear_session() #print(np.unique(textline_mask_tot_ea[:, :]), "textline") # plt.imshow(textline_mask_tot_ea) # plt.show() if self.plotter: self.plotter.save_plot_of_textlines(textline_mask_tot_ea, image_page) return textline_mask_tot_ea def run_deskew(self, textline_mask_tot_ea): sigma = 2 main_page_deskew = True slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, main_page_deskew, plotter=self.plotter) slope_first = 0 if self.plotter: self.plotter.save_deskewed_image(slope_deskew) self.logger.info("slope_deskew: %s", slope_deskew) return slope_deskew, slope_first def run_marginals(self, image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1): image_page_rotated, textline_mask_tot = image_page[:, :], textline_mask_tot_ea[:, :] textline_mask_tot[mask_images[:, :] == 1] = 0 pixel_img = 1 min_area = 0.00001 max_area = 0.0006 textline_mask_tot_small_size = return_contours_of_interested_region_by_size(textline_mask_tot, pixel_img, min_area, max_area) text_regions_p_1[mask_lines[:, :] == 1] = 3 text_regions_p = text_regions_p_1[:, :] # long_short_region[:,:]#self.get_regions_from_2_models(image_page) text_regions_p = np.array(text_regions_p) if num_col_classifier in (1, 2): try: regions_without_seperators = (text_regions_p[:, :] == 1) * 1 regions_without_seperators = regions_without_seperators.astype(np.uint8) text_regions_p = get_marginals(rotate_image(regions_without_seperators, slope_deskew), text_regions_p, num_col_classifier, slope_deskew, kernel=KERNEL) except Exception as e: self.logger.error("exception %s", e) pass if self.plotter: self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page) self.plotter.save_plot_of_layout_main(text_regions_p, image_page) return textline_mask_tot, text_regions_p, image_page_rotated def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier): self.logger.debug('enter run_boxes_no_full_layout') if np.abs(slope_deskew) >= SLOPE_THRESHOLD: image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1]) textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1]) regions_without_seperators_d = (text_regions_p_1_n[:, :] == 1) * 1 regions_without_seperators = (text_regions_p[:, :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_seperators_new(text_regions_p[:,:,0],img_only_regions) if np.abs(slope_deskew) < SLOPE_THRESHOLD: text_regions_p_1_n = None textline_mask_tot_d = None regions_without_seperators_d = None pixel_lines = 3 if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, peaks_neg_fin, matrix_of_lines_ch, spliter_y_new, seperators_closeup_n = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines) if np.abs(slope_deskew) >= SLOPE_THRESHOLD: num_col_d, peaks_neg_fin_d, matrix_of_lines_ch_d, spliter_y_new_d, seperators_closeup_n_d = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines) K.clear_session() self.logger.info("num_col_classifier: %s", num_col_classifier) if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: regions_without_seperators = regions_without_seperators.astype(np.uint8) regions_without_seperators = cv2.erode(regions_without_seperators[:, :], KERNEL, iterations=6) #random_pixels_for_image = np.random.randn(regions_without_seperators.shape[0], regions_without_seperators.shape[1]) #random_pixels_for_image[random_pixels_for_image < -0.5] = 0 #random_pixels_for_image[random_pixels_for_image != 0] = 1 #regions_without_seperators[(random_pixels_for_image[:, :] == 1) & (text_regions_p[:, :] == 2)] = 1 else: regions_without_seperators_d = regions_without_seperators_d.astype(np.uint8) regions_without_seperators_d = cv2.erode(regions_without_seperators_d[:, :], KERNEL, iterations=6) #random_pixels_for_image = np.random.randn(regions_without_seperators_d.shape[0], regions_without_seperators_d.shape[1]) #random_pixels_for_image[random_pixels_for_image < -0.5] = 0 #random_pixels_for_image[random_pixels_for_image != 0] = 1 #regions_without_seperators_d[(random_pixels_for_image[:, :] == 1) & (text_regions_p_1_n[:, :] == 2)] = 1 t1 = time.time() if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes = return_boxes_of_images_by_order_of_reading_new(spliter_y_new, regions_without_seperators, matrix_of_lines_ch, num_col_classifier) boxes_d = None self.logger.debug("len(boxes): %s", len(boxes)) else: boxes_d = return_boxes_of_images_by_order_of_reading_new(spliter_y_new_d, regions_without_seperators_d, matrix_of_lines_ch_d, num_col_classifier) boxes = None self.logger.debug("len(boxes): %s", len(boxes_d)) self.logger.info("detecting boxes took %ss", str(time.time() - t1)) img_revised_tab = text_regions_p[:, :] polygons_of_images = return_contours_of_interested_region(img_revised_tab, 2) # plt.imshow(img_revised_tab) # plt.show() K.clear_session() self.logger.debug('exit run_boxes_no_full_layout') return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_seperators_d, boxes, boxes_d def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions): self.logger.debug('enter run_boxes_full_layout') # set first model with second model text_regions_p[:, :][text_regions_p[:, :] == 2] = 5 text_regions_p[:, :][text_regions_p[:, :] == 3] = 6 text_regions_p[:, :][text_regions_p[:, :] == 4] = 8 K.clear_session() image_page = image_page.astype(np.uint8) regions_fully, regions_fully_only_drop = self.extract_text_regions(image_page, True, cols=num_col_classifier) text_regions_p[:,:][regions_fully[:,:,0]==6]=6 regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 K.clear_session() # plt.imshow(regions_fully[:,:,0]) # plt.show() regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully) # plt.imshow(regions_fully[:,:,0]) # plt.show() K.clear_session() regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) # plt.imshow(regions_fully_np[:,:,0]) # plt.show() if num_col_classifier > 2: regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 else: regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p) # plt.imshow(regions_fully_np[:,:,0]) # plt.show() K.clear_session() # plt.imshow(regions_fully[:,:,0]) # plt.show() regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) # plt.imshow(regions_fully[:,:,0]) # plt.show() text_regions_p[:, :][regions_fully[:, :, 0] == 4] = 4 text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4 #plt.imshow(text_regions_p) #plt.show() if np.abs(slope_deskew) >= SLOPE_THRESHOLD: image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = rotation_not_90_func_full_layout(image_page, textline_mask_tot, text_regions_p, regions_fully, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1]) textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1]) regions_fully_n = resize_image(regions_fully_n, text_regions_p.shape[0], text_regions_p.shape[1]) regions_without_seperators_d = (text_regions_p_1_n[:, :] == 1) * 1 else: text_regions_p_1_n = None textline_mask_tot_d = None regions_without_seperators_d = None regions_without_seperators = (text_regions_p[:, :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_seperators_new(text_regions_p[:,:,0],img_only_regions) K.clear_session() img_revised_tab = np.copy(text_regions_p[:, :]) polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5) self.logger.debug('exit run_boxes_full_layout') return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_seperators_d, regions_fully, regions_without_seperators def run(self): """ Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") t1 = time.time() img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement() self.logger.info("Enhancing took %ss ", str(time.time() - t1)) t1 = time.time() text_regions_p_1 = self.get_regions_from_xy_2models(img_res, is_image_enhanced) self.logger.info("Textregion detection took %ss ", str(time.time() - t1)) t1 = time.time() num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1 = \ self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified) self.logger.info("Graphics detection took %ss ", str(time.time() - t1)) if not num_col: self.logger.info("No columns detected, outputting an empty PAGE-XML") self.write_into_page_xml(self.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [])) self.logger.info("Job done in %ss", str(time.time() - t1)) return t1 = time.time() textline_mask_tot_ea = self.run_textline(image_page) self.logger.info("textline detection took %ss", str(time.time() - t1)) t1 = time.time() slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) self.logger.info("deskewing took %ss", str(time.time() - t1)) t1 = time.time() textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1) self.logger.info("detection of marginals took %ss", str(time.time() - t1)) t1 = time.time() if not self.full_layout: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_seperators_d, boxes, boxes_d = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier) pixel_img = 4 min_area_mar = 0.00001 polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) if self.full_layout: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_seperators_d, regions_fully, regions_without_seperators = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions) # plt.imshow(img_revised_tab) # plt.show() # print(img_revised_tab.shape,text_regions_p_1_n.shape) # text_regions_p_1_n=resize_image(text_regions_p_1_n,img_revised_tab.shape[0],img_revised_tab.shape[1]) # print(np.unique(text_regions_p_1_n),'uni') text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 ##text_only_h=( (img_revised_tab[:,:,0]==2) )*1 # print(text_only.shape,text_only_d.shape) # plt.imshow(text_only) # plt.show() # plt.imshow(text_only_d) # plt.show() min_con_area = 0.000005 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text, hir_on_text = return_contours_of_image(text_only) contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) areas_cnt_text = np.array([cv2.contourArea(contours_only_text_parent[j]) for j in range(len(contours_only_text_parent))]) areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) self.logger.info('areas_cnt_text %s', areas_cnt_text) contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] contours_only_text_parent = [contours_only_text_parent[jz] for jz in range(len(contours_only_text_parent)) if areas_cnt_text[jz] > min_con_area] areas_cnt_text_parent = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > min_con_area] index_con_parents = np.argsort(areas_cnt_text_parent) contours_only_text_parent = list(np.array(contours_only_text_parent)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contoures([contours_biggest]) cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contoures(contours_only_text_parent) contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) areas_cnt_text_d = np.array([cv2.contourArea(contours_only_text_parent_d[j]) for j in range(len(contours_only_text_parent_d))]) areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] index_con_parents_d=np.argsort(areas_cnt_text_d) contours_only_text_parent_d=list(np.array(contours_only_text_parent_d)[index_con_parents_d] ) areas_cnt_text_d=list(np.array(areas_cnt_text_d)[index_con_parents_d] ) cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contoures([contours_biggest_d]) cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contoures(contours_only_text_parent_d) try: cx_bigest_d_last5 = cx_bigest_d[-5:] cy_biggest_d_last5 = cy_biggest_d[-5:] dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) cx_bigest_d_big[0] = cx_bigest_d[ind_largest] cy_biggest_d_big[0] = cy_biggest_d[ind_largest] except Exception as why: self.logger.error(why) (h, w) = text_only.shape[:2] center = (w // 2.0, h // 2.0) M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) M_22 = np.array(M)[:2, :2] p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) x_diff = p_big[0] - cx_bigest_d_big y_diff = p_big[1] - cy_biggest_d_big # print(p_big) # print(cx_bigest_d_big,cy_biggest_d_big) # print(x_diff,y_diff) contours_only_text_parent_d_ordered = [] for i in range(len(contours_only_text_parent)): # img1=np.zeros((text_only.shape[0],text_only.shape[1],3)) # img1=cv2.fillPoly(img1,pts=[contours_only_text_parent[i]] ,color=(1,1,1)) # plt.imshow(img1[:,:,0]) # plt.show() p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) # print(p) p[0] = p[0] - x_diff[0] p[1] = p[1] - y_diff[0] # print(p) # print(cx_bigest_d) # print(cy_biggest_d) dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] # print(np.argmin(dists)) contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) # plt.imshow(img2[:,:,0]) # plt.show() else: contours_only_text, hir_on_text = return_contours_of_image(text_only) contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) areas_cnt_text = np.array([cv2.contourArea(contours_only_text_parent[j]) for j in range(len(contours_only_text_parent))]) areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] contours_only_text_parent = [contours_only_text_parent[jz] for jz in range(len(contours_only_text_parent)) if areas_cnt_text[jz] > min_con_area] areas_cnt_text_parent = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > min_con_area] index_con_parents = np.argsort(areas_cnt_text_parent) contours_only_text_parent = list(np.array(contours_only_text_parent)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contoures([contours_biggest]) cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contoures(contours_only_text_parent) self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) if not self.curved_line: slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: scale_param = 1 all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) all_found_texline_polygons = small_textlines_to_parent_adherence2(all_found_texline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, index_by_text_par_con_marginal, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_texline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_texline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) index_of_vertical_text_contours = np.array(range(len(slopes)))[(abs(np.array(slopes)) > 60)] K.clear_session() # print(index_by_text_par_con,'index_by_text_par_con') if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered)[index_by_text_par_con]) text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, slopes, _, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_texline_polygons, slopes, contours_only_text_parent_d_ordered) else: contours_only_text_parent_d_ordered = None text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_texline_polygons, slopes, contours_only_text_parent_d_ordered) if self.plotter: self.plotter.save_plot_of_layout(text_regions_p, image_page) self.plotter.save_plot_of_layout_all(text_regions_p, image_page) K.clear_session() polygons_of_tabels = [] pixel_img = 4 polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) all_found_texline_polygons = adhere_drop_capital_region_into_cprresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, kernel=KERNEL, curved_line=self.curved_line) # print(len(contours_only_text_parent_h),len(contours_only_text_parent_h_d_ordered),'contours_only_text_parent_h') pixel_lines = 6 if not self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, spliter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines, contours_only_text_parent_h) else: _, _, matrix_of_lines_ch_d, spliter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines, contours_only_text_parent_h_d_ordered) elif self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, peaks_neg_fin, matrix_of_lines_ch, spliter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines) else: num_col_d, peaks_neg_fin_d, matrix_of_lines_ch_d, spliter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines) # print(peaks_neg_fin,peaks_neg_fin_d,'num_col2') # print(spliter_y_new,spliter_y_new_d,'num_col_classifier') # print(matrix_of_lines_ch.shape,matrix_of_lines_ch_d.shape,'matrix_of_lines_ch') if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: regions_without_seperators = regions_without_seperators.astype(np.uint8) regions_without_seperators = cv2.erode(regions_without_seperators[:, :], KERNEL, iterations=6) random_pixels_for_image = np.random.randn(regions_without_seperators.shape[0], regions_without_seperators.shape[1]) random_pixels_for_image[random_pixels_for_image < -0.5] = 0 random_pixels_for_image[random_pixels_for_image != 0] = 1 regions_without_seperators[(random_pixels_for_image[:, :] == 1) & (text_regions_p[:, :] == 5)] = 1 else: regions_without_seperators_d = regions_without_seperators_d.astype(np.uint8) regions_without_seperators_d = cv2.erode(regions_without_seperators_d[:, :], KERNEL, iterations=6) random_pixels_for_image = np.random.randn(regions_without_seperators_d.shape[0], regions_without_seperators_d.shape[1]) random_pixels_for_image[random_pixels_for_image < -0.5] = 0 random_pixels_for_image[random_pixels_for_image != 0] = 1 regions_without_seperators_d[(random_pixels_for_image[:, :] == 1) & (text_regions_p_1_n[:, :] == 5)] = 1 if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes = return_boxes_of_images_by_order_of_reading_new(spliter_y_new, regions_without_seperators, matrix_of_lines_ch, num_col_classifier) else: boxes_d = return_boxes_of_images_by_order_of_reading_new(spliter_y_new_d, regions_without_seperators_d, matrix_of_lines_ch_d, num_col_classifier) if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) if self.full_layout: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) self.write_into_page_xml(self.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_texline_polygons, all_found_texline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, polygons_of_tabels, polygons_of_drop_capitals, polygons_of_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals)) else: contours_only_text_parent_h = None if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) self.write_into_page_xml(self.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_texline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals)) self.logger.info("Job done in %ss", str(time.time() - t1))