# pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring,missing-class-docstring,too-many-branches # pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member # pylint: disable=too-many-public-methods,too-many-arguments,too-many-instance-attributes,too-many-public-methods, # pylint: disable=consider-using-enumerate """ document layout analysis (segmentation) with output in PAGE-XML """ from logging import Logger from difflib import SequenceMatcher as sq from PIL import Image, ImageDraw, ImageFont import math import os import sys import time from typing import Optional import atexit import warnings from functools import partial from pathlib import Path from multiprocessing import cpu_count import gc import copy import json from loky import ProcessPoolExecutor import xml.etree.ElementTree as ET import cv2 import numpy as np from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d from numba import cuda from ocrd import OcrdPage from ocrd_utils import getLogger, tf_disable_interactive_logs try: import torch except ImportError: torch = None try: import matplotlib.pyplot as plt except ImportError: plt = None try: from transformers import TrOCRProcessor, VisionEncoderDecoderModel except ImportError: TrOCRProcessor = VisionEncoderDecoderModel = None #os.environ['CUDA_VISIBLE_DEVICES'] = '-1' tf_disable_interactive_logs() import tensorflow as tf from tensorflow.python.keras import backend as K from tensorflow.keras.models import load_model tf.get_logger().setLevel("ERROR") warnings.filterwarnings("ignore") # use tf1 compatibility for keras backend from tensorflow.compat.v1.keras.backend import set_session from tensorflow.keras import layers from tensorflow.keras.layers import StringLookup from .utils.contour import ( filter_contours_area_of_image, filter_contours_area_of_image_tables, find_contours_mean_y_diff, find_new_features_of_contours, find_features_of_contours, get_text_region_boxes_by_given_contours, get_textregion_contours_in_org_image, get_textregion_contours_in_org_image_light, return_contours_of_image, return_contours_of_interested_region, return_contours_of_interested_region_by_min_size, return_contours_of_interested_textline, return_parent_contours, ) from .utils.rotate import ( rotate_image, rotation_not_90_func, rotation_not_90_func_full_layout ) from .utils.separate_lines import ( textline_contours_postprocessing, separate_lines_new2, return_deskew_slop, do_work_of_slopes_new, do_work_of_slopes_new_curved, do_work_of_slopes_new_light, ) from .utils.drop_capitals import ( adhere_drop_capital_region_into_corresponding_textline, filter_small_drop_capitals_from_no_patch_layout ) from .utils.marginals import get_marginals from .utils.resize import resize_image from .utils import ( boosting_headers_by_longshot_region_segmentation, crop_image_inside_box, find_num_col, otsu_copy_binary, put_drop_out_from_only_drop_model, putt_bb_of_drop_capitals_of_model_in_patches_in_layout, check_any_text_region_in_model_one_is_main_or_header, check_any_text_region_in_model_one_is_main_or_header_light, small_textlines_to_parent_adherence2, order_of_regions, find_number_of_columns_in_document, return_boxes_of_images_by_order_of_reading_new ) from .utils.pil_cv2 import check_dpi, pil2cv from .utils.xml import order_and_id_of_texts from .plot import EynollahPlotter from .writer import EynollahXmlWriter MIN_AREA_REGION = 0.000001 SLOPE_THRESHOLD = 0.13 RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45: DPI_THRESHOLD = 298 MAX_SLOPE = 999 KERNEL = np.ones((5, 5), np.uint8) projection_dim = 64 patch_size = 1 num_patches =21*21#14*14#28*28#14*14#28*28 class Patches(layers.Layer): def __init__(self, **kwargs): super(Patches, self).__init__() self.patch_size = patch_size def call(self, images): batch_size = tf.shape(images)[0] patches = tf.image.extract_patches( images=images, sizes=[1, self.patch_size, self.patch_size, 1], strides=[1, self.patch_size, self.patch_size, 1], rates=[1, 1, 1, 1], padding="VALID", ) patch_dims = patches.shape[-1] patches = tf.reshape(patches, [batch_size, -1, patch_dims]) return patches def get_config(self): config = super().get_config().copy() config.update({ 'patch_size': self.patch_size, }) return config class PatchEncoder(layers.Layer): def __init__(self, **kwargs): super(PatchEncoder, self).__init__() self.num_patches = num_patches self.projection = layers.Dense(units=projection_dim) self.position_embedding = layers.Embedding( input_dim=num_patches, output_dim=projection_dim ) def call(self, patch): positions = tf.range(start=0, limit=self.num_patches, delta=1) encoded = self.projection(patch) + self.position_embedding(positions) return encoded def get_config(self): config = super().get_config().copy() config.update({ 'num_patches': self.num_patches, 'projection': self.projection, 'position_embedding': self.position_embedding, }) return config class Eynollah: def __init__( self, dir_models : str, dir_out : Optional[str] = None, dir_of_cropped_images : Optional[str] = None, extract_only_images : bool =False, dir_of_layout : Optional[str] = None, dir_of_deskewed : Optional[str] = None, dir_of_all : Optional[str] = None, dir_save_page : Optional[str] = None, enable_plotting : bool = False, allow_enhancement : bool = False, curved_line : bool = False, textline_light : bool = False, full_layout : bool = False, tables : bool = False, right2left : bool = False, input_binary : bool = False, allow_scaling : bool = False, headers_off : bool = False, light_version : bool = False, ignore_page_extraction : bool = False, reading_order_machine_based : bool = False, do_ocr : bool = False, num_col_upper : Optional[int] = None, num_col_lower : Optional[int] = None, skip_layout_and_reading_order : bool = False, logger : Optional[Logger] = None, ): if skip_layout_and_reading_order: textline_light = True self.light_version = light_version self.dir_out = dir_out self.dir_of_all = dir_of_all self.dir_save_page = dir_save_page self.reading_order_machine_based = reading_order_machine_based self.dir_of_deskewed = dir_of_deskewed self.dir_of_deskewed = dir_of_deskewed self.dir_of_cropped_images=dir_of_cropped_images self.dir_of_layout=dir_of_layout self.enable_plotting = enable_plotting self.allow_enhancement = allow_enhancement self.curved_line = curved_line self.textline_light = textline_light self.full_layout = full_layout self.tables = tables self.right2left = right2left self.input_binary = input_binary self.allow_scaling = allow_scaling self.headers_off = headers_off self.light_version = light_version self.extract_only_images = extract_only_images self.ignore_page_extraction = ignore_page_extraction self.skip_layout_and_reading_order = skip_layout_and_reading_order self.ocr = do_ocr if num_col_upper: self.num_col_upper = int(num_col_upper) else: self.num_col_upper = num_col_upper if num_col_lower: self.num_col_lower = int(num_col_lower) else: self.num_col_lower = num_col_lower self.logger = logger if logger else getLogger('eynollah') # for parallelization of CPU-intensive tasks: self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200) atexit.register(self.executor.shutdown) self.dir_models = dir_models self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425" self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425" self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" #"/modelens_full_lay_1_3_031124" #"/modelens_full_lay_13__3_19_241024" #"/model_full_lay_13_241024" #"/modelens_full_lay_13_17_231024" #"/modelens_full_lay_1_2_221024" #"/eynollah-full-regions-1column_20210425" self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_1__4_3_091124" #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18" self.model_reading_order_dir = dir_models + "/model_mb_ro_aug_2"#"/model_ens_reading_order_machine_based" #"/modelens_12sp_elay_0_3_4__3_6_n" #"/modelens_earlylayout_12spaltige_2_3_5_6_7_8" #"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18" #"/modelens_1_2_4_5_early_lay_1_2_spaltige" #"/model_3_eraly_layout_no_patches_1_2_spaltige" self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" #"/modelens_full_lay_1_3_031124" #"/modelens_full_lay_13__3_19_241024" #"/model_full_lay_13_241024" #"/modelens_full_lay_13_17_231024" #"/modelens_full_lay_1_2_221024" #"/modelens_full_layout_24_till_28" #"/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_lay_1__4_3_091124" if self.textline_light: #"/modelens_textline_1_4_16092024" #"/model_textline_ens_3_4_5_6_artificial" #"/modelens_textline_1_3_4_20240915" #"/model_textline_ens_3_4_5_6_artificial" #"/modelens_textline_9_12_13_14_15" #"/eynollah-textline_light_20210425" self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024" else: #"/eynollah-textline_20210425" self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024" if self.ocr: self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124" if self.tables: if self.light_version: self.model_table_dir = dir_models + "/modelens_table_0t4_201124" else: self.model_table_dir = dir_models + "/eynollah-tables_20210319" # #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) # #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True) # #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) # config = tf.compat.v1.ConfigProto() # config.gpu_options.allow_growth = True # #session = tf.InteractiveSession() # session = tf.compat.v1.Session(config=config) # set_session(session) try: for device in tf.config.list_physical_devices('GPU'): tf.config.experimental.set_memory_growth(device, True) except: self.logger.warning("no GPU device available") self.model_page = self.our_load_model(self.model_page_dir) self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) self.model_bin = self.our_load_model(self.model_dir_of_binarization) if self.extract_only_images: self.model_region = self.our_load_model(self.model_region_dir_p_ens_light_only_images_extraction) else: self.model_textline = self.our_load_model(self.model_textline_dir) if self.light_version: self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) self.model_region_1_2 = self.our_load_model(self.model_region_dir_p_1_2_sp_np) else: self.model_region = self.our_load_model(self.model_region_dir_p_ens) self.model_region_p2 = self.our_load_model(self.model_region_dir_p2) self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement) ###self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) if self.reading_order_machine_based: self.model_reading_order = self.our_load_model(self.model_reading_order_dir) if self.ocr: self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten") self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") if self.tables: self.model_table = self.our_load_model(self.model_table_dir) def cache_images(self, image_filename=None, image_pil=None, dpi=None): ret = {} t_c0 = time.time() if image_filename: ret['img'] = cv2.imread(image_filename) if self.light_version: self.dpi = 100 else: self.dpi = check_dpi(image_filename) else: ret['img'] = pil2cv(image_pil) if self.light_version: self.dpi = 100 else: self.dpi = check_dpi(image_pil) ret['img_grayscale'] = cv2.cvtColor(ret['img'], cv2.COLOR_BGR2GRAY) for prefix in ('', '_grayscale'): ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8) self._imgs = ret if dpi is not None: self.dpi = dpi def reset_file_name_dir(self, image_filename): t_c = time.time() self.cache_images(image_filename=image_filename) self.plotter = None if not self.enable_plotting else EynollahPlotter( dir_out=self.dir_out, dir_of_all=self.dir_of_all, dir_save_page=self.dir_save_page, dir_of_deskewed=self.dir_of_deskewed, dir_of_cropped_images=self.dir_of_cropped_images, dir_of_layout=self.dir_of_layout, image_filename_stem=Path(Path(image_filename).name).stem) self.writer = EynollahXmlWriter( dir_out=self.dir_out, image_filename=image_filename, curved_line=self.curved_line, textline_light = self.textline_light) def imread(self, grayscale=False, uint8=True): key = 'img' if grayscale: key += '_grayscale' if uint8: key += '_uint8' return self._imgs[key].copy() def isNaN(self, num): return num != num def predict_enhancement(self, img): self.logger.debug("enter predict_enhancement") img_height_model = self.model_enhancement.layers[-1].output_shape[1] img_width_model = self.model_enhancement.layers[-1].output_shape[2] if img.shape[0] < img_height_model: img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST) if img.shape[1] < img_width_model: img = cv2.resize(img, (img_height_model, img.shape[0]), interpolation=cv2.INTER_NEAREST) margin = int(0 * img_width_model) width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin img = img / 255. img_h = img.shape[0] img_w = img.shape[1] prediction_true = np.zeros((img_h, img_w, 3)) nxf = img_w / float(width_mid) nyf = img_h / float(height_mid) nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) for i in range(nxf): for j in range(nyf): if i == 0: index_x_d = i * width_mid index_x_u = index_x_d + img_width_model else: index_x_d = i * width_mid index_x_u = index_x_d + img_width_model if j == 0: index_y_d = j * height_mid index_y_u = index_y_d + img_height_model else: index_y_d = j * height_mid index_y_u = index_y_d + img_height_model if index_x_u > img_w: index_x_u = img_w index_x_d = img_w - img_width_model if index_y_u > img_h: index_y_u = img_h index_y_d = img_h - img_height_model img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :] label_p_pred = self.model_enhancement.predict(img_patch, verbose=0) seg = label_p_pred[0, :, :, :] * 255 if i == 0 and j == 0: prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + 0:index_x_u - margin] = \ seg[0:-margin or None, 0:-margin or None] elif i == nxf - 1 and j == nyf - 1: prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - 0] = \ seg[margin:, margin:] elif i == 0 and j == nyf - 1: prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + 0:index_x_u - margin] = \ seg[margin:, 0:-margin or None] elif i == nxf - 1 and j == 0: prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - 0] = \ seg[0:-margin or None, margin:] elif i == 0 and j != 0 and j != nyf - 1: prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + 0:index_x_u - margin] = \ seg[margin:-margin or None, 0:-margin or None] elif i == nxf - 1 and j != 0 and j != nyf - 1: prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - 0] = \ seg[margin:-margin or None, margin:] elif i != 0 and i != nxf - 1 and j == 0: prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - margin] = \ seg[0:-margin or None, margin:-margin or None] elif i != 0 and i != nxf - 1 and j == nyf - 1: prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - margin] = \ seg[margin:, margin:-margin or None] else: prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin] = \ seg[margin:-margin or None, margin:-margin or None] prediction_true = prediction_true.astype(int) return prediction_true def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred): self.logger.debug("enter calculate_width_height_by_columns") if num_col == 1 and width_early < 1100: img_w_new = 2000 elif num_col == 1 and width_early >= 2500: img_w_new = 2000 elif num_col == 1 and width_early >= 1100 and width_early < 2500: img_w_new = width_early elif num_col == 2 and width_early < 2000: img_w_new = 2400 elif num_col == 2 and width_early >= 3500: img_w_new = 2400 elif num_col == 2 and width_early >= 2000 and width_early < 3500: img_w_new = width_early elif num_col == 3 and width_early < 2000: img_w_new = 3000 elif num_col == 3 and width_early >= 4000: img_w_new = 3000 elif num_col == 3 and width_early >= 2000 and width_early < 4000: img_w_new = width_early elif num_col == 4 and width_early < 2500: img_w_new = 4000 elif num_col == 4 and width_early >= 5000: img_w_new = 4000 elif num_col == 4 and width_early >= 2500 and width_early < 5000: img_w_new = width_early elif num_col == 5 and width_early < 3700: img_w_new = 5000 elif num_col == 5 and width_early >= 7000: img_w_new = 5000 elif num_col == 5 and width_early >= 3700 and width_early < 7000: img_w_new = width_early elif num_col == 6 and width_early < 4500: img_w_new = 6500 # 5400 else: img_w_new = width_early img_h_new = img_w_new * img.shape[0] // img.shape[1] if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: img_new = np.copy(img) num_column_is_classified = False #elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: elif img_h_new >= 8000: img_new = np.copy(img) num_column_is_classified = False else: img_new = resize_image(img, img_h_new, img_w_new) num_column_is_classified = True return img_new, num_column_is_classified def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label_p_pred): self.logger.debug("enter calculate_width_height_by_columns") if num_col == 1: img_w_new = 1000 else: img_w_new = 1300 img_h_new = img_w_new * img.shape[0] // img.shape[1] if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: img_new = np.copy(img) num_column_is_classified = False #elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: elif img_h_new >= 8000: img_new = np.copy(img) num_column_is_classified = False else: img_new = resize_image(img, img_h_new, img_w_new) num_column_is_classified = True return img_new, num_column_is_classified def calculate_width_height_by_columns_extract_only_images(self, img, num_col, width_early, label_p_pred): self.logger.debug("enter calculate_width_height_by_columns") if num_col == 1: img_w_new = 700 elif num_col == 2: img_w_new = 900 elif num_col == 3: img_w_new = 1500 elif num_col == 4: img_w_new = 1800 elif num_col == 5: img_w_new = 2200 elif num_col == 6: img_w_new = 2500 img_h_new = img_w_new * img.shape[0] // img.shape[1] img_new = resize_image(img, img_h_new, img_w_new) num_column_is_classified = True return img_new, num_column_is_classified def resize_image_with_column_classifier(self, is_image_enhanced, img_bin): self.logger.debug("enter resize_image_with_column_classifier") if self.input_binary: img = np.copy(img_bin) else: img = self.imread() _, page_coord = self.early_page_for_num_of_column_classification(img) if self.input_binary: img_in = np.copy(img) img_in = img_in / 255.0 width_early = img_in.shape[1] img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST) img_in = img_in.reshape(1, 448, 448, 3) else: img_1ch = self.imread(grayscale=True, uint8=False) width_early = img_1ch.shape[1] img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] # plt.imshow(img_1ch) # plt.show() img_1ch = img_1ch / 255.0 img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) img_in[0, :, :, 0] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 self.logger.info("Found %s columns (%s)", num_col, label_p_pred) img_new, _ = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) if img_new.shape[1] > img.shape[1]: img_new = self.predict_enhancement(img_new) is_image_enhanced = True return img, img_new, is_image_enhanced def resize_and_enhance_image_with_column_classifier(self, light_version): self.logger.debug("enter resize_and_enhance_image_with_column_classifier") dpi = self.dpi self.logger.info("Detected %s DPI", dpi) if self.input_binary: img = self.imread() prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5) prediction_bin = 255 * (prediction_bin[:,:,0]==0) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8) img= np.copy(prediction_bin) img_bin = prediction_bin else: img = self.imread() img_bin = None width_early = img.shape[1] t1 = time.time() _, page_coord = self.early_page_for_num_of_column_classification(img_bin) self.image_page_org_size = img[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3], :] self.page_coord = page_coord if self.num_col_upper and not self.num_col_lower: num_col = self.num_col_upper label_p_pred = [np.ones(6)] elif self.num_col_lower and not self.num_col_upper: num_col = self.num_col_lower label_p_pred = [np.ones(6)] elif not self.num_col_upper and not self.num_col_lower: if self.input_binary: img_in = np.copy(img) img_in = img_in / 255.0 img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST) img_in = img_in.reshape(1, 448, 448, 3) else: img_1ch = self.imread(grayscale=True) width_early = img_1ch.shape[1] img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] img_1ch = img_1ch / 255.0 img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) img_in[0, :, :, 0] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower): if self.input_binary: img_in = np.copy(img) img_in = img_in / 255.0 img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST) img_in = img_in.reshape(1, 448, 448, 3) else: img_1ch = self.imread(grayscale=True) width_early = img_1ch.shape[1] img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] img_1ch = img_1ch / 255.0 img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) img_in[0, :, :, 0] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 if num_col > self.num_col_upper: num_col = self.num_col_upper label_p_pred = [np.ones(6)] if num_col < self.num_col_lower: num_col = self.num_col_lower label_p_pred = [np.ones(6)] else: num_col = self.num_col_upper label_p_pred = [np.ones(6)] self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5)) if not self.extract_only_images: if dpi < DPI_THRESHOLD: if light_version and num_col in (1,2): img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2( img, num_col, width_early, label_p_pred) else: img_new, num_column_is_classified = self.calculate_width_height_by_columns( img, num_col, width_early, label_p_pred) if light_version: image_res = np.copy(img_new) else: image_res = self.predict_enhancement(img_new) is_image_enhanced = True else: if light_version and num_col in (1,2): img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2( img, num_col, width_early, label_p_pred) image_res = np.copy(img_new) is_image_enhanced = True else: num_column_is_classified = True image_res = np.copy(img) is_image_enhanced = False else: num_column_is_classified = True image_res = np.copy(img) is_image_enhanced = False self.logger.debug("exit resize_and_enhance_image_with_column_classifier") return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin # pylint: disable=attribute-defined-outside-init def get_image_and_scales(self, img_org, img_res, scale): self.logger.debug("enter get_image_and_scales") self.image = np.copy(img_res) self.image_org = np.copy(img_org) self.height_org = self.image.shape[0] self.width_org = self.image.shape[1] self.img_hight_int = int(self.image.shape[0] * scale) self.img_width_int = int(self.image.shape[1] * scale) self.scale_y = self.img_hight_int / float(self.image.shape[0]) self.scale_x = self.img_width_int / float(self.image.shape[1]) self.image = resize_image(self.image, self.img_hight_int, self.img_width_int) # Also set for the plotter if self.plotter: self.plotter.image_org = self.image_org self.plotter.scale_y = self.scale_y self.plotter.scale_x = self.scale_x # Also set for the writer self.writer.image_org = self.image_org self.writer.scale_y = self.scale_y self.writer.scale_x = self.scale_x self.writer.height_org = self.height_org self.writer.width_org = self.width_org def get_image_and_scales_after_enhancing(self, img_org, img_res): self.logger.debug("enter get_image_and_scales_after_enhancing") self.image = np.copy(img_res) self.image = self.image.astype(np.uint8) self.image_org = np.copy(img_org) self.height_org = self.image_org.shape[0] self.width_org = self.image_org.shape[1] self.scale_y = img_res.shape[0] / float(self.image_org.shape[0]) self.scale_x = img_res.shape[1] / float(self.image_org.shape[1]) # Also set for the plotter if self.plotter: self.plotter.image_org = self.image_org self.plotter.scale_y = self.scale_y self.plotter.scale_x = self.scale_x # Also set for the writer self.writer.image_org = self.image_org self.writer.scale_y = self.scale_y self.writer.scale_x = self.scale_x self.writer.height_org = self.height_org self.writer.width_org = self.width_org def do_prediction( self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction") img_height_model = model.layers[-1].output_shape[1] img_width_model = model.layers[-1].output_shape[2] if not patches: img_h_page = img.shape[0] img_w_page = img.shape[1] img = img / float(255.0) img = resize_image(img, img_height_model, img_width_model) label_p_pred = model.predict(img[np.newaxis], verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] if thresholding_for_artificial_class_in_light_version: seg_art = label_p_pred[0,:,:,2] seg_art[seg_art<0.2] = 0 seg_art[seg_art>0] =1 seg[seg_art==1]=2 seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8) return prediction_true if img.shape[0] < img_height_model: img = resize_image(img, img_height_model, img.shape[1]) if img.shape[1] < img_width_model: img = resize_image(img, img.shape[0], img_width_model) self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model) margin = int(marginal_of_patch_percent * img_height_model) width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin img = img / 255. #img = img.astype(np.float16) img_h = img.shape[0] img_w = img.shape[1] prediction_true = np.zeros((img_h, img_w, 3)) mask_true = np.zeros((img_h, img_w)) nxf = img_w / float(width_mid) nyf = img_h / float(height_mid) nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) list_i_s = [] list_j_s = [] list_x_u = [] list_x_d = [] list_y_u = [] list_y_d = [] batch_indexer = 0 img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) for i in range(nxf): for j in range(nyf): if i == 0: index_x_d = i * width_mid index_x_u = index_x_d + img_width_model else: index_x_d = i * width_mid index_x_u = index_x_d + img_width_model if j == 0: index_y_d = j * height_mid index_y_u = index_y_d + img_height_model else: index_y_d = j * height_mid index_y_u = index_y_d + img_height_model if index_x_u > img_w: index_x_u = img_w index_x_d = img_w - img_width_model if index_y_u > img_h: index_y_u = img_h index_y_d = img_h - img_height_model list_i_s.append(i) list_j_s.append(j) list_x_u.append(index_x_u) list_x_d.append(index_x_d) list_y_d.append(index_y_d) list_y_u.append(index_y_u) img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] batch_indexer += 1 if (batch_indexer == n_batch_inference or # last batch i == nxf - 1 and j == nyf - 1): self.logger.debug("predicting patches on %s", str(img_patch.shape)) label_p_pred = model.predict(img_patch, verbose=0) seg = np.argmax(label_p_pred, axis=3) if thresholding_for_some_classes_in_light_version: seg_not_base = label_p_pred[:,:,:,4] seg_not_base[seg_not_base>0.03] =1 seg_not_base[seg_not_base<1] =0 seg_line = label_p_pred[:,:,:,3] seg_line[seg_line>0.1] =1 seg_line[seg_line<1] =0 seg_background = label_p_pred[:,:,:,0] seg_background[seg_background>0.25] =1 seg_background[seg_background<1] =0 seg[seg_not_base==1]=4 seg[seg_background==1]=0 seg[(seg_line==1) & (seg==0)]=3 if thresholding_for_artificial_class_in_light_version: seg_art = label_p_pred[:,:,:,2] seg_art[seg_art<0.2] = 0 seg_art[seg_art>0] =1 seg[seg_art==1]=2 indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): seg_in = seg[indexer_inside_batch] index_y_u_in = list_y_u[indexer_inside_batch] index_y_d_in = list_y_d[indexer_inside_batch] index_x_u_in = list_x_u[indexer_inside_batch] index_x_d_in = list_x_d[indexer_inside_batch] if i_batch == 0 and j_batch == 0: prediction_true[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + 0:index_x_u_in - margin] = \ seg_in[0:-margin or None, 0:-margin or None, np.newaxis] elif i_batch == nxf - 1 and j_batch == nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + margin:index_x_u_in - 0] = \ seg_in[margin:, margin:, np.newaxis] elif i_batch == 0 and j_batch == nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + 0:index_x_u_in - margin] = \ seg_in[margin:, 0:-margin or None, np.newaxis] elif i_batch == nxf - 1 and j_batch == 0: prediction_true[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - 0] = \ seg_in[0:-margin or None, margin:, np.newaxis] elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + 0:index_x_u_in - margin] = \ seg_in[margin:-margin or None, 0:-margin or None, np.newaxis] elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - 0] = \ seg_in[margin:-margin or None, margin:, np.newaxis] elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: prediction_true[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - margin] = \ seg_in[0:-margin or None, margin:-margin or None, np.newaxis] elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + margin:index_x_u_in - margin] = \ seg_in[margin:, margin:-margin or None, np.newaxis] else: prediction_true[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - margin] = \ seg_in[margin:-margin or None, margin:-margin or None, np.newaxis] indexer_inside_batch += 1 list_i_s = [] list_j_s = [] list_x_u = [] list_x_d = [] list_y_u = [] list_y_d = [] batch_indexer = 0 img_patch[:] = 0 prediction_true = prediction_true.astype(np.uint8) #del model gc.collect() return prediction_true def do_padding_with_scale(self, img, scale): h_n = int(img.shape[0]*scale) w_n = int(img.shape[1]*scale) channel0_avg = int( np.mean(img[:,:,0]) ) channel1_avg = int( np.mean(img[:,:,1]) ) channel2_avg = int( np.mean(img[:,:,2]) ) h_diff = img.shape[0] - h_n w_diff = img.shape[1] - w_n h_start = int(0.5 * h_diff) w_start = int(0.5 * w_diff) img_res = resize_image(img, h_n, w_n) #label_res = resize_image(label, h_n, w_n) img_scaled_padded = np.copy(img) #label_scaled_padded = np.zeros(label.shape) img_scaled_padded[:,:,0] = channel0_avg img_scaled_padded[:,:,1] = channel1_avg img_scaled_padded[:,:,2] = channel2_avg img_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = img_res[:,:,:] #label_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = label_res[:,:,:] return img_scaled_padded#, label_scaled_padded def do_prediction_new_concept_scatter_nd( self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction_new_concept") img_height_model = model.layers[-1].output_shape[1] img_width_model = model.layers[-1].output_shape[2] if not patches: img_h_page = img.shape[0] img_w_page = img.shape[1] img = img / 255.0 img = resize_image(img, img_height_model, img_width_model) label_p_pred = model.predict(img[np.newaxis], verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] if thresholding_for_artificial_class_in_light_version: #seg_text = label_p_pred[0,:,:,1] #seg_text[seg_text<0.2] =0 #seg_text[seg_text>0] =1 #seg[seg_text==1]=1 seg_art = label_p_pred[0,:,:,4] seg_art[seg_art<0.2] =0 seg_art[seg_art>0] =1 seg[seg_art==1]=4 seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8) return prediction_true if img.shape[0] < img_height_model: img = resize_image(img, img_height_model, img.shape[1]) if img.shape[1] < img_width_model: img = resize_image(img, img.shape[0], img_width_model) self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model) ##margin = int(marginal_of_patch_percent * img_height_model) #width_mid = img_width_model - 2 * margin #height_mid = img_height_model - 2 * margin img = img / 255.0 img = img.astype(np.float16) img_h = img.shape[0] img_w = img.shape[1] stride_x = img_width_model - 100 stride_y = img_height_model - 100 one_tensor = tf.ones_like(img) img_patches, one_patches = tf.image.extract_patches( images=[img, one_tensor], sizes=[1, img_height_model, img_width_model, 1], strides=[1, stride_y, stride_x, 1], rates=[1, 1, 1, 1], padding='SAME') img_patches = tf.squeeze(img_patches) one_patches = tf.squeeze(one_patches) img_patches_resh = tf.reshape(img_patches, shape=(img_patches.shape[0] * img_patches.shape[1], img_height_model, img_width_model, 3)) pred_patches = model.predict(img_patches_resh, batch_size=n_batch_inference) one_patches = tf.reshape(one_patches, shape=(img_patches.shape[0] * img_patches.shape[1], img_height_model, img_width_model, 3)) x = tf.range(img.shape[1]) y = tf.range(img.shape[0]) x, y = tf.meshgrid(x, y) indices = tf.stack([y, x], axis=-1) indices_patches = tf.image.extract_patches( images=tf.expand_dims(indices, axis=0), sizes=[1, img_height_model, img_width_model, 1], strides=[1, stride_y, stride_x, 1], rates=[1, 1, 1, 1], padding='SAME') indices_patches = tf.squeeze(indices_patches) indices_patches = tf.reshape(indices_patches, shape=(img_patches.shape[0] * img_patches.shape[1], img_height_model, img_width_model, 2)) margin_y = int( 0.5 * (img_height_model - stride_y) ) margin_x = int( 0.5 * (img_width_model - stride_x) ) mask_margin = np.zeros((img_height_model, img_width_model)) mask_margin[margin_y:img_height_model - margin_y, margin_x:img_width_model - margin_x] = 1 indices_patches_array = indices_patches.numpy() for i in range(indices_patches_array.shape[0]): indices_patches_array[i,:,:,0] = indices_patches_array[i,:,:,0]*mask_margin indices_patches_array[i,:,:,1] = indices_patches_array[i,:,:,1]*mask_margin reconstructed = tf.scatter_nd( indices=indices_patches_array, updates=pred_patches, shape=(img.shape[0], img.shape[1], pred_patches.shape[-1])).numpy() prediction_true = np.argmax(reconstructed, axis=2).astype(np.uint8) gc.collect() return np.repeat(prediction_true[:, :, np.newaxis], 3, axis=2) def do_prediction_new_concept( self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction_new_concept") img_height_model = model.layers[-1].output_shape[1] img_width_model = model.layers[-1].output_shape[2] if not patches: img_h_page = img.shape[0] img_w_page = img.shape[1] img = img / 255.0 img = resize_image(img, img_height_model, img_width_model) label_p_pred = model.predict(img[np.newaxis], verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] if thresholding_for_artificial_class_in_light_version: #seg_text = label_p_pred[0,:,:,1] #seg_text[seg_text<0.2] =0 #seg_text[seg_text>0] =1 #seg[seg_text==1]=1 seg_art = label_p_pred[0,:,:,4] seg_art[seg_art<0.2] =0 seg_art[seg_art>0] =1 seg[seg_art==1]=4 seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8) return prediction_true , resize_image(label_p_pred[0, :, :, 1] , img_h_page, img_w_page) if img.shape[0] < img_height_model: img = resize_image(img, img_height_model, img.shape[1]) if img.shape[1] < img_width_model: img = resize_image(img, img.shape[0], img_width_model) self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model) margin = int(marginal_of_patch_percent * img_height_model) width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin img = img / 255.0 img = img.astype(np.float16) img_h = img.shape[0] img_w = img.shape[1] prediction_true = np.zeros((img_h, img_w, 3)) confidence_matrix = np.zeros((img_h, img_w)) mask_true = np.zeros((img_h, img_w)) nxf = img_w / float(width_mid) nyf = img_h / float(height_mid) nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) list_i_s = [] list_j_s = [] list_x_u = [] list_x_d = [] list_y_u = [] list_y_d = [] batch_indexer = 0 img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) for i in range(nxf): for j in range(nyf): if i == 0: index_x_d = i * width_mid index_x_u = index_x_d + img_width_model else: index_x_d = i * width_mid index_x_u = index_x_d + img_width_model if j == 0: index_y_d = j * height_mid index_y_u = index_y_d + img_height_model else: index_y_d = j * height_mid index_y_u = index_y_d + img_height_model if index_x_u > img_w: index_x_u = img_w index_x_d = img_w - img_width_model if index_y_u > img_h: index_y_u = img_h index_y_d = img_h - img_height_model list_i_s.append(i) list_j_s.append(j) list_x_u.append(index_x_u) list_x_d.append(index_x_d) list_y_d.append(index_y_d) list_y_u.append(index_y_u) img_patch[batch_indexer] = img[index_y_d:index_y_u, index_x_d:index_x_u] batch_indexer += 1 if (batch_indexer == n_batch_inference or # last batch i == nxf - 1 and j == nyf - 1): self.logger.debug("predicting patches on %s", str(img_patch.shape)) label_p_pred = model.predict(img_patch,verbose=0) seg = np.argmax(label_p_pred, axis=3) if thresholding_for_some_classes_in_light_version: seg_art = label_p_pred[:,:,:,4] seg_art[seg_art<0.2] =0 seg_art[seg_art>0] =1 seg_line = label_p_pred[:,:,:,3] seg_line[seg_line>0.5] =1#seg_line[seg_line>0.1] =1 seg_line[seg_line<1] =0 seg[seg_art==1]=4 seg[(seg_line==1) & (seg==0)]=3 if thresholding_for_artificial_class_in_light_version: seg_art = label_p_pred[:,:,:,2] seg_art[seg_art<0.2] = 0 seg_art[seg_art>0] =1 seg[seg_art==1]=2 indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): seg_in = seg[indexer_inside_batch] index_y_u_in = list_y_u[indexer_inside_batch] index_y_d_in = list_y_d[indexer_inside_batch] index_x_u_in = list_x_u[indexer_inside_batch] index_x_d_in = list_x_d[indexer_inside_batch] if i_batch == 0 and j_batch == 0: prediction_true[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + 0:index_x_u_in - margin] = \ seg_in[0:-margin or None, 0:-margin or None, np.newaxis] confidence_matrix[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + 0:index_x_u_in - margin] = \ label_p_pred[0, 0:-margin or None, 0:-margin or None, 1] elif i_batch == nxf - 1 and j_batch == nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + margin:index_x_u_in - 0] = \ seg_in[margin:, margin:, np.newaxis] confidence_matrix[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + margin:index_x_u_in - 0] = \ label_p_pred[0, margin:, margin:, 1] elif i_batch == 0 and j_batch == nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + 0:index_x_u_in - margin] = \ seg_in[margin:, 0:-margin or None, np.newaxis] confidence_matrix[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + 0:index_x_u_in - margin] = \ label_p_pred[0, margin:, 0:-margin or None, 1] elif i_batch == nxf - 1 and j_batch == 0: prediction_true[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - 0] = \ seg_in[0:-margin or None, margin:, np.newaxis] confidence_matrix[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - 0] = \ label_p_pred[0, 0:-margin or None, margin:, 1] elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + 0:index_x_u_in - margin] = \ seg_in[margin:-margin or None, 0:-margin or None, np.newaxis] confidence_matrix[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + 0:index_x_u_in - margin] = \ label_p_pred[0, margin:-margin or None, 0:-margin or None, 1] elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - 0] = \ seg_in[margin:-margin or None, margin:, np.newaxis] confidence_matrix[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - 0] = \ label_p_pred[0, margin:-margin or None, margin:, 1] elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: prediction_true[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - margin] = \ seg_in[0:-margin or None, margin:-margin or None, np.newaxis] confidence_matrix[index_y_d_in + 0:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - margin] = \ label_p_pred[0, 0:-margin or None, margin:-margin or None, 1] elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: prediction_true[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + margin:index_x_u_in - margin] = \ seg_in[margin:, margin:-margin or None, np.newaxis] confidence_matrix[index_y_d_in + margin:index_y_u_in - 0, index_x_d_in + margin:index_x_u_in - margin] = \ label_p_pred[0, margin:, margin:-margin or None, 1] else: prediction_true[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - margin] = \ seg_in[margin:-margin or None, margin:-margin or None, np.newaxis] confidence_matrix[index_y_d_in + margin:index_y_u_in - margin, index_x_d_in + margin:index_x_u_in - margin] = \ label_p_pred[0, margin:-margin or None, margin:-margin or None, 1] indexer_inside_batch += 1 list_i_s = [] list_j_s = [] list_x_u = [] list_x_d = [] list_y_u = [] list_y_d = [] batch_indexer = 0 img_patch[:] = 0 prediction_true = prediction_true.astype(np.uint8) gc.collect() return prediction_true, confidence_matrix def extract_page(self): self.logger.debug("enter extract_page") cont_page = [] if not self.ignore_page_extraction: img = cv2.GaussianBlur(self.image, (5, 5), 0) img_page_prediction = self.do_prediction(False, img, self.model_page) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) thresh = cv2.dilate(thresh, KERNEL, iterations=3) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if len(contours)>0: cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) cnt = contours[np.argmax(cnt_size)] x, y, w, h = cv2.boundingRect(cnt) if x <= 30: w += x x = 0 if (self.image.shape[1] - (x + w)) <= 30: w = w + (self.image.shape[1] - (x + w)) if y <= 30: h = h + y y = 0 if (self.image.shape[0] - (y + h)) <= 30: h = h + (self.image.shape[0] - (y + h)) box = [x, y, w, h] else: box = [0, 0, img.shape[1], img.shape[0]] cropped_page, page_coord = crop_image_inside_box(box, self.image) cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) self.logger.debug("exit extract_page") else: box = [0, 0, self.image.shape[1], self.image.shape[0]] cropped_page, page_coord = crop_image_inside_box(box, self.image) cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) return cropped_page, page_coord, cont_page def early_page_for_num_of_column_classification(self,img_bin): if not self.ignore_page_extraction: self.logger.debug("enter early_page_for_num_of_column_classification") if self.input_binary: img = np.copy(img_bin).astype(np.uint8) else: img = self.imread() img = cv2.GaussianBlur(img, (5, 5), 0) img_page_prediction = self.do_prediction(False, img, self.model_page) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) thresh = cv2.dilate(thresh, KERNEL, iterations=3) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if len(contours)>0: cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) cnt = contours[np.argmax(cnt_size)] box = cv2.boundingRect(cnt) else: box = [0, 0, img.shape[1], img.shape[0]] cropped_page, page_coord = crop_image_inside_box(box, img) self.logger.debug("exit early_page_for_num_of_column_classification") else: img = self.imread() box = [0, 0, img.shape[1], img.shape[0]] cropped_page, page_coord = crop_image_inside_box(box, img) return cropped_page, page_coord def extract_text_regions_new(self, img, patches, cols): self.logger.debug("enter extract_text_regions") img_height_h = img.shape[0] img_width_h = img.shape[1] model_region = self.model_region_fl if patches else self.model_region_fl_np if self.light_version: pass elif not patches: img = otsu_copy_binary(img).astype(np.uint8) prediction_regions = None elif cols: img = otsu_copy_binary(img).astype(np.uint8) if cols == 1: img = resize_image(img, int(img_height_h * 1000 / float(img_width_h)), 1000).astype(np.uint8) elif cols == 2: img = resize_image(img, int(img_height_h * 1300 / float(img_width_h)), 1300).astype(np.uint8) elif cols == 3: img = resize_image(img, int(img_height_h * 1600 / float(img_width_h)), 1600).astype(np.uint8) elif cols == 4: img = resize_image(img, int(img_height_h * 1900 / float(img_width_h)), 1900).astype(np.uint8) elif cols == 5: img = resize_image(img, int(img_height_h * 2200 / float(img_width_h)), 2200).astype(np.uint8) else: img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500).astype(np.uint8) prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1, n_batch_inference=3) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions def extract_text_regions(self, img, patches, cols): self.logger.debug("enter extract_text_regions") img_height_h = img.shape[0] img_width_h = img.shape[1] model_region = self.model_region_fl if patches else self.model_region_fl_np if not patches: img = otsu_copy_binary(img) img = img.astype(np.uint8) prediction_regions2 = None elif cols: if cols == 1: img_height_new = int(img_height_h * 0.7) img_width_new = int(img_width_h * 0.7) elif cols == 2: img_height_new = int(img_height_h * 0.4) img_width_new = int(img_width_h * 0.4) else: img_height_new = int(img_height_h * 0.3) img_width_new = int(img_width_h * 0.3) img2 = otsu_copy_binary(img) img2 = img2.astype(np.uint8) img2 = resize_image(img2, img_height_new, img_width_new) prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=0.1) prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) img = otsu_copy_binary(img).astype(np.uint8) if cols == 1: img = resize_image(img, int(img_height_h * 0.5), int(img_width_h * 0.5)).astype(np.uint8) elif cols == 2 and img_width_h >= 2000: img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)).astype(np.uint8) elif cols == 3 and ((self.scale_x == 1 and img_width_h > 3000) or (self.scale_x != 1 and img_width_h > 2800)): img = resize_image(img, 2800 * img_height_h // img_width_h, 2800).astype(np.uint8) elif cols == 4 and ((self.scale_x == 1 and img_width_h > 4000) or (self.scale_x != 1 and img_width_h > 3700)): img = resize_image(img, 3700 * img_height_h // img_width_h, 3700).astype(np.uint8) elif cols == 4: img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)).astype(np.uint8) elif cols == 5 and self.scale_x == 1 and img_width_h > 5000: img = resize_image(img, int(img_height_h * 0.7), int(img_width_h * 0.7)).astype(np.uint8) elif cols == 5: img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)).astype(np.uint8) elif img_width_h > 5600: img = resize_image(img, 5600 * img_height_h // img_width_h, 5600).astype(np.uint8) else: img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)).astype(np.uint8) prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) M_main_tot = [cv2.moments(polygons_of_textlines[j]) for j in range(len(polygons_of_textlines))] cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] args_textlines = np.array(range(len(polygons_of_textlines))) all_found_textline_polygons = [] slopes = [] all_box_coord =[] for index, con_region_ind in enumerate(contours_par): results = [cv2.pointPolygonTest(con_region_ind, (cx_main_tot[ind], cy_main_tot[ind]), False) for ind in args_textlines ] results = np.array(results) indexes_in = args_textlines[results==1] textlines_ins = [polygons_of_textlines[ind] for ind in indexes_in] all_found_textline_polygons.append(textlines_ins[::-1]) slopes.append(slope_deskew) _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated) all_box_coord.append(crop_coor) return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_light") results = self.executor.map(partial(do_work_of_slopes_new_light, textline_mask_tot_ea=textline_mask_tot, image_page_rotated=image_page_rotated, slope_deskew=slope_deskew,textline_light=self.textline_light, logger=self.logger,), boxes, contours, contours_par, range(len(contours_par))) #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_light") return tuple(zip(*results)) def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new") results = self.executor.map(partial(do_work_of_slopes_new, textline_mask_tot_ea=textline_mask_tot, image_page_rotated=image_page_rotated, slope_deskew=slope_deskew, MAX_SLOPE=MAX_SLOPE, KERNEL=KERNEL, logger=self.logger, plotter=self.plotter,), boxes, contours, contours_par, range(len(contours_par))) #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new") return tuple(zip(*results)) def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_curved") results = self.executor.map(partial(do_work_of_slopes_new_curved, textline_mask_tot_ea=textline_mask_tot, image_page_rotated=image_page_rotated, mask_texts_only=mask_texts_only, num_col=num_col, scale_par=scale_par, slope_deskew=slope_deskew, MAX_SLOPE=MAX_SLOPE, KERNEL=KERNEL, logger=self.logger, plotter=self.plotter,), boxes, contours, contours_par, range(len(contours_par))) #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_curved") return tuple(zip(*results)) def textline_contours(self, img, use_patches, scaler_h, scaler_w, num_col_classifier=None): self.logger.debug('enter textline_contours') #img = img.astype(np.uint8) img_org = np.copy(img) img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) prediction_textline = self.do_prediction( use_patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=self.textline_light) #if not self.textline_light: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, self.model_textline) #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 prediction_textline = resize_image(prediction_textline, img_h, img_w) textline_mask_tot_ea_art = (prediction_textline[:,:]==2)*1 old_art = np.copy(textline_mask_tot_ea_art) if not self.textline_light: textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') #textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2 textline_mask_tot_ea_lines = (prediction_textline[:,:]==1)*1 textline_mask_tot_ea_lines = textline_mask_tot_ea_lines.astype('uint8') if not self.textline_light: textline_mask_tot_ea_lines = cv2.dilate(textline_mask_tot_ea_lines, KERNEL, iterations=1) prediction_textline[:,:][textline_mask_tot_ea_lines[:,:]==1]=1 if not self.textline_light: prediction_textline[:,:][old_art[:,:]==1]=2 prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) self.logger.debug('exit textline_contours') return ((prediction_textline[:, :, 0]==1).astype(np.uint8), (prediction_textline_longshot_true_size[:, :, 0]==1).astype(np.uint8)) def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process): self.logger.debug('enter do_work_of_slopes') slope_biggest = 0 slopes_sub = [] boxes_sub_new = [] poly_sub = [] for mv in range(len(boxes_per_process)): crop_img, _ = crop_image_inside_box(boxes_per_process[mv], np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) crop_img = crop_img[:, :, 0] crop_img = cv2.erode(crop_img, KERNEL, iterations=2) try: textline_con, hierarchy = return_contours_of_image(crop_img) textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierarchy, max_area=1, min_area=0.0008) y_diff_mean = find_contours_mean_y_diff(textline_con_fil) sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) crop_img[crop_img > 0] = 1 slope_corresponding_textregion = return_deskew_slop(crop_img, sigma_des, map=self.executor.map, logger=self.logger, plotter=self.plotter) except Exception as why: self.logger.error(why) slope_corresponding_textregion = MAX_SLOPE if slope_corresponding_textregion == MAX_SLOPE: slope_corresponding_textregion = slope_biggest slopes_sub.append(slope_corresponding_textregion) cnt_clean_rot = textline_contours_postprocessing( crop_img, slope_corresponding_textregion, contours_per_process[mv], boxes_per_process[mv]) poly_sub.append(cnt_clean_rot) boxes_sub_new.append(boxes_per_process[mv]) q.put(slopes_sub) poly.put(poly_sub) box_sub.put(boxes_sub_new) self.logger.debug('exit do_work_of_slopes') def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_extract_images_only") erosion_hurts = False img_org = np.copy(img) img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] if num_col_classifier == 1: img_w_new = 700 elif num_col_classifier == 2: img_w_new = 900 elif num_col_classifier == 3: img_w_new = 1500 elif num_col_classifier == 4: img_w_new = 1800 elif num_col_classifier == 5: img_w_new = 2200 elif num_col_classifier == 6: img_w_new = 2500 img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) prediction_regions_org, _ = self.do_prediction_new_concept(True, img_resized, self.model_region) prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) image_page, page_coord, cont_page = self.extract_page() prediction_regions_org = prediction_regions_org[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] prediction_regions_org=prediction_regions_org[:,:,0] mask_lines_only = (prediction_regions_org[:,:] ==3)*1 mask_texts_only = (prediction_regions_org[:,:] ==1)*1 mask_images_only=(prediction_regions_org[:,:] ==2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) polygons_lines_xml = textline_con_fil = filter_contours_area_of_image( mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1,1,1)) text_regions_p_true[text_regions_p_true.shape[0]-15:text_regions_p_true.shape[0], :] = 0 text_regions_p_true[:, text_regions_p_true.shape[1]-15:text_regions_p_true.shape[1]] = 0 ##polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.0001) polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.001) image_boundary_of_doc = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1])) ###image_boundary_of_doc[:6, :] = 1 ###image_boundary_of_doc[text_regions_p_true.shape[0]-6:text_regions_p_true.shape[0], :] = 1 ###image_boundary_of_doc[:, :6] = 1 ###image_boundary_of_doc[:, text_regions_p_true.shape[1]-6:text_regions_p_true.shape[1]] = 1 polygons_of_images_fin = [] for ploy_img_ind in polygons_of_images: """ test_poly_image = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1])) test_poly_image = cv2.fillPoly(test_poly_image, pts=[ploy_img_ind], color=(1,1,1)) test_poly_image = test_poly_image + image_boundary_of_doc test_poly_image_intersected_area = ( test_poly_image[:,:]==2 )*1 test_poly_image_intersected_area = test_poly_image_intersected_area.sum() if test_poly_image_intersected_area==0: ##polygons_of_images_fin.append(ploy_img_ind) box = cv2.boundingRect(ploy_img_ind) _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) # cont_page.append(np.array([[page_coord[2], page_coord[0]], # [page_coord[3], page_coord[0]], # [page_coord[3], page_coord[1]], # [page_coord[2], page_coord[1]]])) polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) ) """ box = x, y, w, h = cv2.boundingRect(ploy_img_ind) if h < 150 or w < 150: pass else: _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) # cont_page.append(np.array([[page_coord[2], page_coord[0]], # [page_coord[3], page_coord[0]], # [page_coord[3], page_coord[1]], # [page_coord[2], page_coord[1]]])) polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]])) self.logger.debug("exit get_regions_extract_images_only") return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False): self.logger.debug("enter get_regions_light_v") t_in = time.time() erosion_hurts = False img_org = np.copy(img) img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: img_w_new = 1000 elif num_col_classifier == 2: img_w_new = 1500#1500 elif num_col_classifier == 3: img_w_new = 2000 elif num_col_classifier == 4: img_w_new = 2500 elif num_col_classifier == 5: img_w_new = 3000 else: img_w_new = 4000 img_h_new = img_w_new * img_org.shape[0] // img_org.shape[1] img_resized = resize_image(img,img_h_new, img_w_new ) t_bin = time.time() #if (not self.input_binary) or self.full_layout: #if self.input_binary: #img_bin = np.copy(img_resized) ###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30): ###prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) ####print("inside bin ", time.time()-t_bin) ###prediction_bin=prediction_bin[:,:,0] ###prediction_bin = (prediction_bin[:,:]==0)*1 ###prediction_bin = prediction_bin*255 ###prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) ###prediction_bin = prediction_bin.astype(np.uint16) ####img= np.copy(prediction_bin) ###img_bin = np.copy(prediction_bin) ###else: ###img_bin = np.copy(img_resized) if self.ocr and not self.input_binary: prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) prediction_bin = 255 * (prediction_bin[:,:,0] == 0) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) prediction_bin = prediction_bin.astype(np.uint16) #img= np.copy(prediction_bin) img_bin = np.copy(prediction_bin) else: img_bin = np.copy(img_resized) #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) self.logger.debug("detecting textlines on %s with %d colors", str(img_resized.shape), len(np.unique(img_resized))) textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) #print(self.image_org.shape) #cv2.imwrite('out_13.png', self.image_page_org_size) #plt.imshwo(self.image_page_org_size) #plt.show() if not skip_layout_and_reading_order: #print("inside 2 ", time.time()-t_in) if num_col_classifier == 1 or num_col_classifier == 2: if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: self.logger.debug("resized to %dx%d for %d cols", img_resized.shape[1], img_resized.shape[0], num_col_classifier) prediction_regions_org, confidence_matrix = self.do_prediction_new_concept( True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_some_classes_in_light_version=True) else: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) confidence_matrix = np.zeros((self.image_org.shape[0], self.image_org.shape[1])) prediction_regions_page, confidence_matrix_page = self.do_prediction_new_concept( False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) ys = slice(*self.page_coord[0:2]) xs = slice(*self.page_coord[2:4]) prediction_regions_org[ys, xs] = prediction_regions_page confidence_matrix[ys, xs] = confidence_matrix_page else: new_h = (900+ (num_col_classifier-3)*100) img_resized = resize_image(img_bin, int(new_h * img_bin.shape[0] /img_bin.shape[1]), new_h) self.logger.debug("resized to %dx%d (new_h=%d) for %d cols", img_resized.shape[1], img_resized.shape[0], new_h, num_col_classifier) prediction_regions_org, confidence_matrix = self.do_prediction_new_concept( True, img_resized, self.model_region_1_2, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) confidence_matrix = resize_image(confidence_matrix, img_height_h, img_width_h ) img_bin = resize_image(img_bin, img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] mask_lines_only = (prediction_regions_org[:,:] ==3)*1 mask_texts_only = (prediction_regions_org[:,:] ==1)*1 mask_texts_only = mask_texts_only.astype('uint8') ##if num_col_classifier == 1 or num_col_classifier == 2: ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) mask_images_only=(prediction_regions_org[:,:] ==2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) test_khat = np.zeros(prediction_regions_org.shape) test_khat = cv2.fillPoly(test_khat, pts=polygons_lines_xml, color=(1,1,1)) #plt.imshow(test_khat[:,:]) #plt.show() #for jv in range(1): #print(jv, hir_lines_xml[0][232][3]) #test_khat = np.zeros(prediction_regions_org.shape) #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) #plt.imshow(test_khat[:,:]) #plt.show() polygons_lines_xml = filter_contours_area_of_image( mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) test_khat = np.zeros(prediction_regions_org.shape) test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) #plt.imshow(test_khat[:,:]) #plt.show() #sys.exit() polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) ##polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_lines, color=(3,3,3)) text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) #plt.imshow(textline_mask_tot_ea) #plt.show() textline_mask_tot_ea[(text_regions_p_true==0) | (text_regions_p_true==4) ] = 0 #plt.imshow(textline_mask_tot_ea) #plt.show() #print("inside 4 ", time.time()-t_in) self.logger.debug("exit get_regions_light_v") return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin, confidence_matrix else: img_bin = resize_image(img_bin,img_height_h, img_width_h ) self.logger.debug("exit get_regions_light_v") return None, erosion_hurts, None, textline_mask_tot_ea, img_bin, None def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_from_xy_2models") erosion_hurts = False img_org = np.copy(img) img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] ratio_y=1.3 ratio_x=1 img = resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) prediction_regions_org_y = self.do_prediction(True, img, self.model_region) prediction_regions_org_y = resize_image(prediction_regions_org_y, img_height_h, img_width_h ) #plt.imshow(prediction_regions_org_y[:,:,0]) #plt.show() prediction_regions_org_y = prediction_regions_org_y[:,:,0] mask_zeros_y = (prediction_regions_org_y[:,:]==0)*1 ##img_only_regions_with_sep = ( (prediction_regions_org_y[:,:] != 3) & (prediction_regions_org_y[:,:] != 0) )*1 img_only_regions_with_sep = (prediction_regions_org_y == 1).astype(np.uint8) try: img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=20) _, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1))) prediction_regions_org = self.do_prediction(True, img, self.model_region) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] prediction_regions_org[(prediction_regions_org[:,:]==1) & (mask_zeros_y[:,:]==1)]=0 img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1])) prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, marginal_of_patch_percent=0.2) prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) mask_zeros2 = (prediction_regions_org2[:,:,0] == 0) mask_lines2 = (prediction_regions_org2[:,:,0] == 3) text_sume_early = (prediction_regions_org[:,:] == 1).sum() prediction_regions_org_copy = np.copy(prediction_regions_org) prediction_regions_org_copy[(prediction_regions_org_copy[:,:]==1) & (mask_zeros2[:,:]==1)] = 0 text_sume_second = ((prediction_regions_org_copy[:,:]==1)*1).sum() rate_two_models = 100. * text_sume_second / text_sume_early self.logger.info("ratio_of_two_models: %s", rate_two_models) if not(is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD): prediction_regions_org = np.copy(prediction_regions_org_copy) prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3 mask_lines_only=(prediction_regions_org[:,:]==3)*1 prediction_regions_org = cv2.erode(prediction_regions_org[:,:], KERNEL, iterations=2) prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2) if rate_two_models<=40: if self.input_binary: prediction_bin = np.copy(img_org) else: prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin = 255 * (prediction_bin[:,:,0]==0) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) ratio_y=1 ratio_x=1 img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) prediction_regions_org = self.do_prediction(True, img, self.model_region) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] mask_lines_only=(prediction_regions_org[:,:]==3)*1 mask_texts_only=(prediction_regions_org[:,:]==1)*1 mask_images_only=(prediction_regions_org[:,:]==2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) polygons_lines_xml = filter_contours_area_of_image( mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) text_regions_p_true = cv2.fillPoly(text_regions_p_true,pts = polygons_of_only_lines, color=(3, 3, 3)) text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) self.logger.debug("exit get_regions_from_xy_2models") return text_regions_p_true, erosion_hurts, polygons_lines_xml except: if self.input_binary: prediction_bin = np.copy(img_org) prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin = 255 * (prediction_bin[:,:,0]==0) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) else: prediction_bin = np.copy(img_org) ratio_y=1 ratio_x=1 img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) prediction_regions_org = self.do_prediction(True, img, self.model_region) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] #mask_lines_only=(prediction_regions_org[:,:]==3)*1 #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1)) #prediction_regions_org = self.do_prediction(True, img, self.model_region) #prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) #prediction_regions_org = prediction_regions_org[:,:,0] #prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0 mask_lines_only = (prediction_regions_org == 3)*1 mask_texts_only = (prediction_regions_org == 1)*1 mask_images_only= (prediction_regions_org == 2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) polygons_lines_xml = filter_contours_area_of_image( mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) erosion_hurts = True self.logger.debug("exit get_regions_from_xy_2models") return text_regions_p_true, erosion_hurts, polygons_lines_xml def do_order_of_regions_full_layout( self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): self.logger.debug("enter do_order_of_regions_full_layout") boxes = np.array(boxes, dtype=int) # to be on the safe side cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours( contours_only_text_parent) cx_text_only_h, cy_text_only_h, x_min_text_only_h, _, _, _, y_cor_x_min_main_h = find_new_features_of_contours( contours_only_text_parent_h) try: arg_text_con = [] for ii in range(len(cx_text_only)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if (x_min_text_only[ii] + 80 >= boxes[jj][0] and x_min_text_only[ii] + 80 < boxes[jj][1] and y_cor_x_min_main[ii] >= boxes[jj][2] and y_cor_x_min_main[ii] < boxes[jj][3]): arg_text_con.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) arg_text_con_h = [] for ii in range(len(cx_text_only_h)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if (x_min_text_only_h[ii] + 80 >= boxes[jj][0] and x_min_text_only_h[ii] + 80 < boxes[jj][1] and y_cor_x_min_main_h[ii] >= boxes[jj][2] and y_cor_x_min_main_h[ii] < boxes[jj][3]): arg_text_con_h.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + (cy_text_only_h[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con_h.append(ind_min) args_contours_h = np.array(range(len(arg_text_con_h))) order_by_con_head = np.zeros(len(arg_text_con_h)) order_by_con_main = np.zeros(len(arg_text_con)) ref_point = 0 order_of_texts_tot = [] id_of_texts_tot = [] for iij in range(len(boxes)): ys = slice(*boxes[iij][2:4]) xs = slice(*boxes[iij][0:2]) args_contours_box = args_contours[np.array(arg_text_con) == iij] args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij] con_inter_box = [] con_inter_box_h = [] for box in args_contours_box: con_inter_box.append(contours_only_text_parent[box]) for box in args_contours_box_h: con_inter_box_h.append(contours_only_text_parent_h[box]) indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) order_of_texts, id_of_texts = order_and_id_of_texts( con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_sorted_head = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 2] indexes_by_type_head = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 2] for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for zahler, _ in enumerate(args_contours_box_h): arg_order_v = indexes_sorted_head[zahler] order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = \ np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for jji in range(len(id_of_texts)): order_of_texts_tot.append(order_of_texts[jji] + ref_point) id_of_texts_tot.append(id_of_texts[jji]) ref_point += len(id_of_texts) order_of_texts_tot = [] for tj1 in range(len(contours_only_text_parent)): order_of_texts_tot.append(int(order_by_con_main[tj1])) for tj1 in range(len(contours_only_text_parent_h)): order_of_texts_tot.append(int(order_by_con_head[tj1])) order_text_new = [] for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) except Exception as why: self.logger.error(why) arg_text_con = [] for ii in range(len(cx_text_only)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if (cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]): # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) order_by_con_main = np.zeros(len(arg_text_con)) ############################# head arg_text_con_h = [] for ii in range(len(cx_text_only_h)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if (cx_text_only_h[ii] >= boxes[jj][0] and cx_text_only_h[ii] < boxes[jj][1] and cy_text_only_h[ii] >= boxes[jj][2] and cy_text_only_h[ii] < boxes[jj][3]): # this is valid if the center of region identify in which box it is located arg_text_con_h.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + (cy_text_only_h[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con_h.append(ind_min) args_contours_h = np.array(range(len(arg_text_con_h))) order_by_con_head = np.zeros(len(arg_text_con_h)) ref_point = 0 order_of_texts_tot = [] id_of_texts_tot = [] for iij, _ in enumerate(boxes): ys = slice(*boxes[iij][2:4]) xs = slice(*boxes[iij][0:2]) args_contours_box = args_contours[np.array(arg_text_con) == iij] args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij] con_inter_box = [] con_inter_box_h = [] for box in args_contours_box: con_inter_box.append(contours_only_text_parent[box]) for box in args_contours_box_h: con_inter_box_h.append(contours_only_text_parent_h[box]) indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) order_of_texts, id_of_texts = order_and_id_of_texts( con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_sorted_head = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 2] indexes_by_type_head = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 2] for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for zahler, _ in enumerate(args_contours_box_h): arg_order_v = indexes_sorted_head[zahler] order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = \ np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for jji, _ in enumerate(id_of_texts): order_of_texts_tot.append(order_of_texts[jji] + ref_point) id_of_texts_tot.append(id_of_texts[jji]) ref_point += len(id_of_texts) order_of_texts_tot = [] for tj1 in range(len(contours_only_text_parent)): order_of_texts_tot.append(int(order_by_con_main[tj1])) for tj1 in range(len(contours_only_text_parent_h)): order_of_texts_tot.append(int(order_by_con_head[tj1])) order_text_new = [] for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) self.logger.debug("exit do_order_of_regions_full_layout") return order_text_new, id_of_texts_tot def do_order_of_regions_no_full_layout( self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): self.logger.debug("enter do_order_of_regions_no_full_layout") boxes = np.array(boxes, dtype=int) # to be on the safe side cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours( contours_only_text_parent) try: arg_text_con = [] for ii in range(len(cx_text_only)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if (x_min_text_only[ii] + 80 >= boxes[jj][0] and x_min_text_only[ii] + 80 < boxes[jj][1] and y_cor_x_min_main[ii] >= boxes[jj][2] and y_cor_x_min_main[ii] < boxes[jj][3]): arg_text_con.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) order_by_con_main = np.zeros(len(arg_text_con)) ref_point = 0 order_of_texts_tot = [] id_of_texts_tot = [] for iij in range(len(boxes)): ys = slice(*boxes[iij][2:4]) xs = slice(*boxes[iij][0:2]) args_contours_box = args_contours[np.array(arg_text_con) == iij] con_inter_box = [] con_inter_box_h = [] for i in range(len(args_contours_box)): con_inter_box.append(contours_only_text_parent[args_contours_box[i]]) indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) order_of_texts, id_of_texts = order_and_id_of_texts( con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for jji, _ in enumerate(id_of_texts): order_of_texts_tot.append(order_of_texts[jji] + ref_point) id_of_texts_tot.append(id_of_texts[jji]) ref_point += len(id_of_texts) order_of_texts_tot = [] for tj1 in range(len(contours_only_text_parent)): order_of_texts_tot.append(int(order_by_con_main[tj1])) order_text_new = [] for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) except Exception as why: self.logger.error(why) arg_text_con = [] for ii in range(len(cx_text_only)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if (cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]): # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] ind_min = np.argmin(dists_tr_from_box) arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) order_by_con_main = np.zeros(len(arg_text_con)) ref_point = 0 order_of_texts_tot = [] id_of_texts_tot = [] for iij in range(len(boxes)): ys = slice(*boxes[iij][2:4]) xs = slice(*boxes[iij][0:2]) args_contours_box = args_contours[np.array(arg_text_con) == iij] con_inter_box = [] con_inter_box_h = [] for i in range(len(args_contours_box)): con_inter_box.append(contours_only_text_parent[args_contours_box[i]]) indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) order_of_texts, id_of_texts = order_and_id_of_texts( con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for jji, _ in enumerate(id_of_texts): order_of_texts_tot.append(order_of_texts[jji] + ref_point) id_of_texts_tot.append(id_of_texts[jji]) ref_point += len(id_of_texts) order_of_texts_tot = [] for tj1 in range(len(contours_only_text_parent)): order_of_texts_tot.append(int(order_by_con_main[tj1])) order_text_new = [] for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) self.logger.debug("exit do_order_of_regions_no_full_layout") return order_text_new, id_of_texts_tot def check_iou_of_bounding_box_and_contour_for_tables( self, layout, table_prediction_early, pixel_table, num_col_classifier): layout_org = np.copy(layout) layout_org[:,:,0][layout_org[:,:,0]==pixel_table] = 0 layout = (layout[:,:,0]==pixel_table)*1 layout =np.repeat(layout[:, :, np.newaxis], 3, axis=2) layout = layout.astype(np.uint8) imgray = cv2.cvtColor(layout, cv2.COLOR_BGR2GRAY ) _, thresh = cv2.threshold(imgray, 0, 255, 0) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) contours_new = [] for i in range(len(contours)): x, y, w, h = cv2.boundingRect(contours[i]) iou = cnt_size[i] /float(w*h) *100 if iou<80: layout_contour = np.zeros((layout_org.shape[0], layout_org.shape[1])) layout_contour= cv2.fillPoly(layout_contour,pts=[contours[i]] ,color=(1,1,1)) layout_contour_sum = layout_contour.sum(axis=0) layout_contour_sum_diff = np.diff(layout_contour_sum) layout_contour_sum_diff= np.abs(layout_contour_sum_diff) layout_contour_sum_diff_smoothed= gaussian_filter1d(layout_contour_sum_diff, 10) peaks, _ = find_peaks(layout_contour_sum_diff_smoothed, height=0) peaks= peaks[layout_contour_sum_diff_smoothed[peaks]>4] for j in range(len(peaks)): layout_contour[:,peaks[j]-3+1:peaks[j]+1+3] = 0 layout_contour=cv2.erode(layout_contour[:,:], KERNEL, iterations=5) layout_contour=cv2.dilate(layout_contour[:,:], KERNEL, iterations=5) layout_contour =np.repeat(layout_contour[:, :, np.newaxis], 3, axis=2) layout_contour = layout_contour.astype(np.uint8) imgray = cv2.cvtColor(layout_contour, cv2.COLOR_BGR2GRAY ) _, thresh = cv2.threshold(imgray, 0, 255, 0) contours_sep, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) for ji in range(len(contours_sep) ): contours_new.append(contours_sep[ji]) if num_col_classifier>=2: only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1])) only_recent_contour_image= cv2.fillPoly(only_recent_contour_image, pts=[contours_sep[ji]], color=(1,1,1)) table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum() #print(iou_in,'iou_in_in1') if iou_in>30: layout_org= cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=3 * (pixel_table,)) else: pass else: layout_org= cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=3 * (pixel_table,)) else: contours_new.append(contours[i]) if num_col_classifier>=2: only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1])) only_recent_contour_image= cv2.fillPoly(only_recent_contour_image,pts=[contours[i]] ,color=(1,1,1)) table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum() #print(iou_in,'iou_in') if iou_in>30: layout_org= cv2.fillPoly(layout_org, pts=[contours[i]], color=3 * (pixel_table,)) else: pass else: layout_org= cv2.fillPoly(layout_org, pts=[contours[i]], color=3 * (pixel_table,)) return layout_org, contours_new def delete_separator_around(self, spliter_y,peaks_neg,image_by_region, pixel_line, pixel_table): # format of subboxes: box=[x1, x2 , y1, y2] pix_del = 100 if len(image_by_region.shape)==3: for i in range(len(spliter_y)-1): for j in range(1,len(peaks_neg[i])-1): ys = slice(int(spliter_y[i]), int(spliter_y[i+1])) xs = slice(peaks_neg[i][j] - pix_del, peaks_neg[i][j] + pix_del) image_by_region[ys,xs,0][image_by_region[ys,xs,0]==pixel_line] = 0 image_by_region[ys,xs,0][image_by_region[ys,xs,1]==pixel_line] = 0 image_by_region[ys,xs,0][image_by_region[ys,xs,2]==pixel_line] = 0 image_by_region[ys,xs,0][image_by_region[ys,xs,0]==pixel_table] = 0 image_by_region[ys,xs,0][image_by_region[ys,xs,1]==pixel_table] = 0 image_by_region[ys,xs,0][image_by_region[ys,xs,2]==pixel_table] = 0 else: for i in range(len(spliter_y)-1): for j in range(1,len(peaks_neg[i])-1): ys = slice(int(spliter_y[i]), int(spliter_y[i+1])) xs = slice(peaks_neg[i][j] - pix_del, peaks_neg[i][j] + pix_del) image_by_region[ys,xs][image_by_region[ys,xs]==pixel_line] = 0 image_by_region[ys,xs][image_by_region[ys,xs]==pixel_table] = 0 return image_by_region def add_tables_heuristic_to_layout( self, image_regions_eraly_p, boxes, slope_mean_hor, spliter_y, peaks_neg_tot, image_revised, num_col_classifier, min_area, pixel_line): pixel_table =10 image_revised_1 = self.delete_separator_around(spliter_y, peaks_neg_tot, image_revised, pixel_line, pixel_table) try: image_revised_1[:,:30][image_revised_1[:,:30]==pixel_line] = 0 image_revised_1[:,-30:][image_revised_1[:,-30:]==pixel_line] = 0 except: pass boxes = np.array(boxes, dtype=int) # to be on the safe side img_comm_e = np.zeros(image_revised_1.shape) img_comm = np.repeat(img_comm_e[:, :, np.newaxis], 3, axis=2) for indiv in np.unique(image_revised_1): image_col=(image_revised_1==indiv)*255 img_comm_in=np.repeat(image_col[:, :, np.newaxis], 3, axis=2) img_comm_in=img_comm_in.astype(np.uint8) imgray = cv2.cvtColor(img_comm_in, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours,hirarchy=cv2.findContours(thresh.copy(), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) if indiv==pixel_table: main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area = 1, min_area = 0.001) else: main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area = 1, min_area = min_area) img_comm = cv2.fillPoly(img_comm, pts = main_contours, color = (indiv, indiv, indiv)) img_comm = img_comm.astype(np.uint8) if not self.isNaN(slope_mean_hor): image_revised_last = np.zeros((image_regions_eraly_p.shape[0], image_regions_eraly_p.shape[1],3)) for i in range(len(boxes)): box_ys = slice(*boxes[i][2:4]) box_xs = slice(*boxes[i][0:2]) image_box = img_comm[box_ys, box_xs] try: image_box_tabels_1=(image_box[:,:,0]==pixel_table)*1 contours_tab,_=return_contours_of_image(image_box_tabels_1) contours_tab=filter_contours_area_of_image_tables(image_box_tabels_1,contours_tab,_,1,0.003) image_box_tabels_1=(image_box[:,:,0]==pixel_line)*1 image_box_tabels_and_m_text=( (image_box[:,:,0]==pixel_table) | (image_box[:,:,0]==1) )*1 image_box_tabels_and_m_text=image_box_tabels_and_m_text.astype(np.uint8) image_box_tabels_1=image_box_tabels_1.astype(np.uint8) image_box_tabels_1 = cv2.dilate(image_box_tabels_1,KERNEL,iterations = 5) contours_table_m_text,_=return_contours_of_image(image_box_tabels_and_m_text) image_box_tabels=np.repeat(image_box_tabels_1[:, :, np.newaxis], 3, axis=2) image_box_tabels=image_box_tabels.astype(np.uint8) imgray = cv2.cvtColor(image_box_tabels, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours_line,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) y_min_main_line ,y_max_main_line=find_features_of_contours(contours_line) y_min_main_tab ,y_max_main_tab=find_features_of_contours(contours_tab) cx_tab_m_text,cy_tab_m_text ,x_min_tab_m_text , x_max_tab_m_text, y_min_tab_m_text ,y_max_tab_m_text, _= find_new_features_of_contours(contours_table_m_text) cx_tabl,cy_tabl ,x_min_tabl , x_max_tabl, y_min_tabl ,y_max_tabl,_= find_new_features_of_contours(contours_tab) if len(y_min_main_tab )>0: y_down_tabs=[] y_up_tabs=[] for i_t in range(len(y_min_main_tab )): y_down_tab=[] y_up_tab=[] for i_l in range(len(y_min_main_line)): if y_min_main_tab[i_t]>y_min_main_line[i_l] and y_max_main_tab[i_t]>y_min_main_line[i_l] and y_min_main_tab[i_t]>y_max_main_line[i_l] and y_max_main_tab[i_t]>y_min_main_line[i_l]: pass elif y_min_main_tab[i_t]0: for ijv in range(len(y_min_tab_col1)): image_revised_last[int(y_min_tab_col1[ijv]):int(y_max_tab_col1[ijv]),:,:]=pixel_table return image_revised_last def do_order_of_regions(self, *args, **kwargs): if self.full_layout: return self.do_order_of_regions_full_layout(*args, **kwargs) return self.do_order_of_regions_no_full_layout(*args, **kwargs) def get_tables_from_model(self, img, num_col_classifier): img_org = np.copy(img) img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] patches = False if self.light_version: prediction_table, _ = self.do_prediction_new_concept(patches, img, self.model_table) prediction_table = prediction_table.astype(np.int16) return prediction_table[:,:,0] else: if num_col_classifier < 4 and num_col_classifier > 2: prediction_table = self.do_prediction(patches, img, self.model_table) pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_table) pre_updown = cv2.flip(pre_updown, -1) prediction_table[:,:,0][pre_updown[:,:,0]==1]=1 prediction_table = prediction_table.astype(np.int16) elif num_col_classifier ==2: height_ext = 0 # img.shape[0] // 4 h_start = height_ext // 2 width_ext = img.shape[1] // 8 w_start = width_ext // 2 img_new = np.zeros((img.shape[0] + height_ext, img.shape[1] + width_ext, img.shape[2])).astype(float) ys = slice(h_start, h_start + img.shape[0]) xs = slice(w_start, w_start + img.shape[1]) img_new[ys, xs] = img prediction_ext = self.do_prediction(patches, img_new, self.model_table) pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_table) pre_updown = cv2.flip(pre_updown, -1) prediction_table = prediction_ext[ys, xs] prediction_table_updown = pre_updown[ys, xs] prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 prediction_table = prediction_table.astype(np.int16) elif num_col_classifier ==1: height_ext = 0 # img.shape[0] // 4 h_start = height_ext // 2 width_ext = img.shape[1] // 4 w_start = width_ext // 2 img_new =np.zeros((img.shape[0] + height_ext, img.shape[1] + width_ext, img.shape[2])).astype(float) ys = slice(h_start, h_start + img.shape[0]) xs = slice(w_start, w_start + img.shape[1]) img_new[ys, xs] = img prediction_ext = self.do_prediction(patches, img_new, self.model_table) pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_table) pre_updown = cv2.flip(pre_updown, -1) prediction_table = prediction_ext[ys, xs] prediction_table_updown = pre_updown[ys, xs] prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 prediction_table = prediction_table.astype(np.int16) else: prediction_table = np.zeros(img.shape) img_w_half = img.shape[1] // 2 pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], self.model_table) pre2 = self.do_prediction(patches, img[:,img_w_half:,:], self.model_table) pre_full = self.do_prediction(patches, img[:,:,:], self.model_table) pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_table) pre_updown = cv2.flip(pre_updown, -1) prediction_table_full_erode = cv2.erode(pre_full[:,:,0], KERNEL, iterations=4) prediction_table_full_erode = cv2.dilate(prediction_table_full_erode, KERNEL, iterations=4) prediction_table_full_updown_erode = cv2.erode(pre_updown[:,:,0], KERNEL, iterations=4) prediction_table_full_updown_erode = cv2.dilate(prediction_table_full_updown_erode, KERNEL, iterations=4) prediction_table[:,0:img_w_half,:] = pre1[:,:,:] prediction_table[:,img_w_half:,:] = pre2[:,:,:] prediction_table[:,:,0][prediction_table_full_erode[:,:]==1]=1 prediction_table[:,:,0][prediction_table_full_updown_erode[:,:]==1]=1 prediction_table = prediction_table.astype(np.int16) #prediction_table_erode = cv2.erode(prediction_table[:,:,0], self.kernel, iterations=6) #prediction_table_erode = cv2.dilate(prediction_table_erode, self.kernel, iterations=6) prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20) prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20) return prediction_table_erode.astype(np.int16) def run_graphics_and_columns_light( self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light): #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics') #print(erosion_hurts, 'erosion_hurts') t_in_gr = time.time() img_g = self.imread(grayscale=True, uint8=True) img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3)) img_g3 = img_g3.astype(np.uint8) img_g3[:, :, 0] = img_g[:, :] img_g3[:, :, 1] = img_g[:, :] img_g3[:, :, 2] = img_g[:, :] image_page, page_coord, cont_page = self.extract_page() #print("inside graphics 1 ", time.time() - t_in_gr) if self.tables: table_prediction = self.get_tables_from_model(image_page, num_col_classifier) else: table_prediction = np.zeros((image_page.shape[0], image_page.shape[1])).astype(np.int16) if self.plotter: self.plotter.save_page_image(image_page) text_regions_p_1 = text_regions_p_1[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] mask_images = (text_regions_p_1[:, :] == 2) * 1 mask_images = mask_images.astype(np.uint8) mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10) mask_lines = (text_regions_p_1[:, :] == 3) * 1 mask_lines = mask_lines.astype(np.uint8) img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) #print("inside graphics 2 ", time.time() - t_in_gr) if erosion_hurts: img_only_regions = np.copy(img_only_regions_with_sep[:,:]) else: img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6) ##print(img_only_regions.shape,'img_only_regions') ##plt.imshow(img_only_regions[:,:]) ##plt.show() ##num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) try: num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) num_col = num_col + 1 if not num_column_is_classified: num_col_classifier = num_col + 1 except Exception as why: self.logger.error(why) num_col = None #print("inside graphics 3 ", time.time() - t_in_gr) return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light) def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, img_bin_light): #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics') #print(erosion_hurts, 'erosion_hurts') t_in_gr = time.time() img_g = self.imread(grayscale=True, uint8=True) img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3)) img_g3 = img_g3.astype(np.uint8) img_g3[:, :, 0] = img_g[:, :] img_g3[:, :, 1] = img_g[:, :] img_g3[:, :, 2] = img_g[:, :] image_page, page_coord, cont_page = self.extract_page() #print("inside graphics 1 ", time.time() - t_in_gr) textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] return page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page def run_graphics_and_columns( self, text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts): t_in_gr = time.time() img_g = self.imread(grayscale=True, uint8=True) img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3)) img_g3 = img_g3.astype(np.uint8) img_g3[:, :, 0] = img_g[:, :] img_g3[:, :, 1] = img_g[:, :] img_g3[:, :, 2] = img_g[:, :] image_page, page_coord, cont_page = self.extract_page() if self.tables: table_prediction = self.get_tables_from_model(image_page, num_col_classifier) else: table_prediction = np.zeros((image_page.shape[0], image_page.shape[1])).astype(np.int16) if self.plotter: self.plotter.save_page_image(image_page) text_regions_p_1 = text_regions_p_1[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] mask_images = (text_regions_p_1[:, :] == 2) * 1 mask_images = mask_images.astype(np.uint8) mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10) mask_lines = (text_regions_p_1[:, :] == 3) * 1 mask_lines = mask_lines.astype(np.uint8) img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) if erosion_hurts: img_only_regions = np.copy(img_only_regions_with_sep[:,:]) else: img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6) try: num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) num_col = num_col + 1 if not num_column_is_classified: num_col_classifier = num_col + 1 except Exception as why: self.logger.error(why) num_col = None return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction) def run_enhancement(self, light_version): t_in = time.time() self.logger.info("Resizing and enhancing image...") is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = \ self.resize_and_enhance_image_with_column_classifier(light_version) self.logger.info("Image was %senhanced.", '' if is_image_enhanced else 'not ') scale = 1 if is_image_enhanced: if self.allow_enhancement: #img_res = img_res.astype(np.uint8) self.get_image_and_scales(img_org, img_res, scale) if self.plotter: self.plotter.save_enhanced_image(img_res) else: self.get_image_and_scales_after_enhancing(img_org, img_res) else: if self.allow_enhancement: self.get_image_and_scales(img_org, img_res, scale) else: self.get_image_and_scales(img_org, img_res, scale) if self.allow_scaling: img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced, img_bin) self.get_image_and_scales_after_enhancing(img_org, img_res) #print("enhancement in ", time.time()-t_in) return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified def run_textline(self, image_page, num_col_classifier=None): scaler_h_textline = 1#1.3 # 1.2#1.2 scaler_w_textline = 1#1.3 # 0.9#1 #print(image_page.shape) textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) if self.plotter: self.plotter.save_plot_of_textlines(textline_mask_tot_ea, image_page) return textline_mask_tot_ea def run_deskew(self, textline_mask_tot_ea): #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew') slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), 2, 30, True, map=self.executor.map, logger=self.logger, plotter=self.plotter) slope_first = 0 if self.plotter: self.plotter.save_deskewed_image(slope_deskew) self.logger.info("slope_deskew: %.2f°", slope_deskew) return slope_deskew, slope_first def run_marginals( self, image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction): image_page_rotated, textline_mask_tot = image_page[:, :], textline_mask_tot_ea[:, :] textline_mask_tot[mask_images[:, :] == 1] = 0 text_regions_p_1[mask_lines[:, :] == 1] = 3 text_regions_p = text_regions_p_1[:, :] text_regions_p = np.array(text_regions_p) if num_col_classifier in (1, 2): try: regions_without_separators = (text_regions_p[:, :] == 1) * 1 if self.tables: regions_without_separators[table_prediction==1] = 1 regions_without_separators = regions_without_separators.astype(np.uint8) text_regions_p = get_marginals( rotate_image(regions_without_separators, slope_deskew), text_regions_p, num_col_classifier, slope_deskew, light_version=self.light_version, kernel=KERNEL) except Exception as e: self.logger.error("exception %s", e) if self.plotter: self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page) self.plotter.save_plot_of_layout_main(text_regions_p, image_page) return textline_mask_tot, text_regions_p, image_page_rotated def run_boxes_no_full_layout( self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts): self.logger.debug('enter run_boxes_no_full_layout') t_0_box = time.time() if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = rotation_not_90_func( image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1]) textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1]) table_prediction_n = resize_image(table_prediction_n, text_regions_p.shape[0], text_regions_p.shape[1]) regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1 if self.tables: regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 regions_without_separators = (text_regions_p[:, :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) #print(time.time()-t_0_box,'time box in 1') if self.tables: regions_without_separators[table_prediction ==1 ] = 1 if np.abs(slope_deskew) < SLOPE_THRESHOLD: text_regions_p_1_n = None textline_mask_tot_d = None regions_without_separators_d = None pixel_lines = 3 if np.abs(slope_deskew) < SLOPE_THRESHOLD: _, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) #print(time.time()-t_0_box,'time box in 2') self.logger.info("num_col_classifier: %s", num_col_classifier) if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: regions_without_separators = regions_without_separators.astype(np.uint8) regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) else: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) #print(time.time()-t_0_box,'time box in 3') t1 = time.time() if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) boxes_d = None self.logger.debug("len(boxes): %s", len(boxes)) #print(time.time()-t_0_box,'time box in 3.1') if self.tables: if self.light_version: pass else: text_regions_p_tables = np.copy(text_regions_p) text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables, num_col_classifier , 0.000005, pixel_line) #print(time.time()-t_0_box,'time box in 3.2') img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables( img_revised_tab2, table_prediction, 10, num_col_classifier) #print(time.time()-t_0_box,'time box in 3.3') else: boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) boxes = None self.logger.debug("len(boxes): %s", len(boxes_d)) if self.tables: if self.light_version: pass else: text_regions_p_tables = np.copy(text_regions_p_1_n) text_regions_p_tables =np.round(text_regions_p_tables) text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables( img_revised_tab2, table_prediction_n, 10, num_col_classifier) img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) #print(time.time()-t_0_box,'time box in 4') self.logger.info("detecting boxes took %.1fs", time.time() - t1) if self.tables: if self.light_version: text_regions_p[:,:][table_prediction[:,:]==1] = 10 img_revised_tab=text_regions_p[:,:] else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: img_revised_tab = np.copy(img_revised_tab2[:,:,0]) img_revised_tab[:,:][(text_regions_p[:,:] == 1) & (img_revised_tab[:,:] != 10)] = 1 else: img_revised_tab = np.copy(text_regions_p[:,:]) img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 text_regions_p[:,:][text_regions_p[:,:]==10] = 0 text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 else: img_revised_tab=text_regions_p[:,:] #img_revised_tab = text_regions_p[:, :] if self.light_version: polygons_of_images = return_contours_of_interested_region(text_regions_p, 2) else: polygons_of_images = return_contours_of_interested_region(img_revised_tab, 2) pixel_img = 4 min_area_mar = 0.00001 if self.light_version: marginal_mask = (text_regions_p[:,:]==pixel_img)*1 marginal_mask = marginal_mask.astype('uint8') marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2) polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar) else: polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) #print(time.time()-t_0_box,'time box in 5') self.logger.debug('exit run_boxes_no_full_layout') return (polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables) def run_boxes_full_layout( self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light): self.logger.debug('enter run_boxes_full_layout') t_full0 = time.time() if self.tables: if self.light_version: text_regions_p[:,:][table_prediction[:,:]==1] = 10 img_revised_tab = text_regions_p[:,:] if np.abs(slope_deskew) >= SLOPE_THRESHOLD: image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1 regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 else: text_regions_p_1_n = None textline_mask_tot_d = None regions_without_separators_d = None # regions_without_separators = ( text_regions_p[:,:]==1 | text_regions_p[:,:]==2 )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) regions_without_separators = (text_regions_p[:,:] == 1)*1 regions_without_separators[table_prediction == 1] = 1 else: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1 regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 else: text_regions_p_1_n = None textline_mask_tot_d = None regions_without_separators_d = None # regions_without_separators = ( text_regions_p[:,:]==1 | text_regions_p[:,:]==2 )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) regions_without_separators = (text_regions_p[:,:] == 1)*1 regions_without_separators[table_prediction == 1] = 1 pixel_lines=3 if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) if np.abs(slope_deskew) >= SLOPE_THRESHOLD: num_col_d, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) if num_col_classifier>=3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: regions_without_separators = regions_without_separators.astype(np.uint8) regions_without_separators = cv2.erode(regions_without_separators[:,:], KERNEL, iterations=6) if np.abs(slope_deskew) >= SLOPE_THRESHOLD: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) regions_without_separators_d = cv2.erode(regions_without_separators_d[:,:], KERNEL, iterations=6) else: pass if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) text_regions_p_tables = np.copy(text_regions_p) text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10 pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables, num_col_classifier , 0.000005, pixel_line) img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables( img_revised_tab2, table_prediction, 10, num_col_classifier) else: boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) text_regions_p_tables = np.copy(text_regions_p_1_n) text_regions_p_tables = np.round(text_regions_p_tables) text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10 pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables( img_revised_tab2, table_prediction_n, 10, num_col_classifier) img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) if np.abs(slope_deskew) < 0.13: img_revised_tab = np.copy(img_revised_tab2[:,:,0]) else: img_revised_tab = np.copy(text_regions_p[:,:]) img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 ##img_revised_tab=img_revised_tab2[:,:,0] #img_revised_tab=text_regions_p[:,:] text_regions_p[:,:][text_regions_p[:,:]==10] = 0 text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 #img_revised_tab[img_revised_tab2[:,:,0]==10] =10 pixel_img = 4 min_area_mar = 0.00001 if self.light_version: marginal_mask = (text_regions_p[:,:]==pixel_img)*1 marginal_mask = marginal_mask.astype('uint8') marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2) polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar) else: polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) # set first model with second model text_regions_p[:, :][text_regions_p[:, :] == 2] = 5 text_regions_p[:, :][text_regions_p[:, :] == 3] = 6 text_regions_p[:, :][text_regions_p[:, :] == 4] = 8 image_page = image_page.astype(np.uint8) #print("full inside 1", time.time()- t_full0) regions_fully, regions_fully_only_drop = self.extract_text_regions_new( img_bin_light if self.light_version else image_page, False, cols=num_col_classifier) #print("full inside 2", time.time()- t_full0) # 6 is the separators lable in old full layout model # 4 is the drop capital class in old full layout model # in the new full layout drop capital is 3 and separators are 5 text_regions_p[:,:][regions_fully[:,:,0]==5]=6 ###regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4 #text_regions_p[:,:][regions_fully[:,:,0]==6]=6 ##regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) ##regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 drop_capital_label_in_full_layout_model = 3 drops = (regions_fully[:,:,0]==drop_capital_label_in_full_layout_model)*1 drops= drops.astype(np.uint8) regions_fully[:,:,0][regions_fully[:,:,0]==drop_capital_label_in_full_layout_model] = 1 drops = cv2.erode(drops[:,:], KERNEL, iterations=1) regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout( regions_fully, drop_capital_label_in_full_layout_model, text_regions_p) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 ##else: ##regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p) ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) # plt.imshow(regions_fully[:,:,0]) # plt.show() text_regions_p[:, :][regions_fully[:, :, 0] == drop_capital_label_in_full_layout_model] = 4 ####text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4 #plt.imshow(text_regions_p) #plt.show() ####if not self.tables: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = rotation_not_90_func_full_layout( image_page, textline_mask_tot, text_regions_p, regions_fully, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1]) textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1]) regions_fully_n = resize_image(regions_fully_n, text_regions_p.shape[0], text_regions_p.shape[1]) if not self.tables: regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1 else: text_regions_p_1_n = None textline_mask_tot_d = None regions_without_separators_d = None if not self.tables: regions_without_separators = (text_regions_p[:, :] == 1) * 1 img_revised_tab = np.copy(text_regions_p[:, :]) polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5) self.logger.debug('exit run_boxes_full_layout') #print("full inside 3", time.time()- t_full0) return (polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables) @staticmethod def our_load_model(model_file): if model_file.endswith('.h5') and Path(model_file[:-3]).exists(): # prefer SavedModel over HDF5 format if it exists model_file = model_file[:-3] try: model = load_model(model_file, compile=False) except: model = load_model(model_file, compile=False, custom_objects={ "PatchEncoder": PatchEncoder, "Patches": Patches}) return model def do_order_of_regions_with_model(self, contours_only_text_parent, contours_only_text_parent_h, text_regions_p): y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] img_poly = np.zeros((y_len,x_len), dtype='uint8') img_poly[text_regions_p[:,:]==1] = 1 img_poly[text_regions_p[:,:]==2] = 2 img_poly[text_regions_p[:,:]==3] = 4 img_poly[text_regions_p[:,:]==6] = 5 ###temp ##sep_mask = (img_poly==5)*1 ##sep_mask = sep_mask.astype('uint8') ##sep_mask = cv2.erode(sep_mask, kernel=KERNEL, iterations=2) ##img_poly[img_poly==5] = 0 ##img_poly[sep_mask==1] = 5 ### img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') if contours_only_text_parent_h: _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours( contours_only_text_parent_h) for j in range(len(cy_main)): img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12, int(x_min_main[j]):int(x_max_main[j])] = 1 co_text_all = contours_only_text_parent + contours_only_text_parent_h else: co_text_all = contours_only_text_parent if not len(co_text_all): return [], [] labels_con = np.zeros((int(y_len /6.), int(x_len/6.), len(co_text_all)), dtype=bool) co_text_all = [(i/6).astype(int) for i in co_text_all] for i in range(len(co_text_all)): img = labels_con[:,:,i].astype(np.uint8) #img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST) cv2.fillPoly(img, pts=[co_text_all[i]], color=(1,)) labels_con[:,:,i] = img height1 =672#448 width1 = 448#224 height2 =672#448 width2= 448#224 height3 =672#448 width3 = 448#224 labels_con = resize_image(labels_con.astype(np.uint8), height1, width1).astype(bool) img_header_and_sep = resize_image(img_header_and_sep, height1, width1) img_poly = resize_image(img_poly, height3, width3) inference_bs = 3 input_1 = np.zeros((inference_bs, height1, width1, 3)) ordered = [list(range(len(co_text_all)))] index_update = 0 #print(labels_con.shape[2],"number of regions for reading order") while index_update>=0: ij_list = ordered.pop(index_update) i = ij_list.pop(0) ante_list = [] post_list = [] tot_counter = 0 batch = [] for j in ij_list: img1 = labels_con[:,:,i].astype(float) img2 = labels_con[:,:,j].astype(float) img1[img_poly==5] = 2 img2[img_poly==5] = 2 img1[img_header_and_sep==1] = 3 img2[img_header_and_sep==1] = 3 input_1[len(batch), :, :, 0] = img1 / 3. input_1[len(batch), :, :, 2] = img2 / 3. input_1[len(batch), :, :, 1] = img_poly / 5. tot_counter += 1 batch.append(j) if tot_counter % inference_bs == 0 or tot_counter == len(ij_list): y_pr = self.model_reading_order.predict(input_1 , verbose=0) for jb, j in enumerate(batch): if y_pr[jb][0]>=0.5: post_list.append(j) else: ante_list.append(j) batch = [] if len(ante_list): ordered.insert(index_update, ante_list) index_update += 1 ordered.insert(index_update, [i]) if len(post_list): ordered.insert(index_update + 1, post_list) index_update = -1 for index_next, ij_list in enumerate(ordered): if len(ij_list) > 1: index_update = index_next break ordered = [i[0] for i in ordered] region_ids = ['region_%04d' % i for i in range(len(co_text_all))] return ordered, region_ids def return_start_and_end_of_common_text_of_textline_ocr(self, textline_image, ind_tot): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] common_window = int(0.2*width) width1 = int ( width/2. - common_window ) width2 = int ( width/2. + common_window ) img_sum = np.sum(textline_image[:,:,0], axis=0) sum_smoothed = gaussian_filter1d(img_sum, 3) peaks_real, _ = find_peaks(sum_smoothed, height=0) if len(peaks_real)>70: print(len(peaks_real), 'len(peaks_real)') peaks_real = peaks_real[(peaks_realwidth1)] arg_sort = np.argsort(sum_smoothed[peaks_real]) arg_sort4 =arg_sort[::-1][:4] peaks_sort_4 = peaks_real[arg_sort][::-1][:4] argsort_sorted = np.argsort(peaks_sort_4) first_4_sorted = peaks_sort_4[argsort_sorted] y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]] #print(first_4_sorted,'first_4_sorted') arg_sortnew = np.argsort(y_4_sorted) peaks_final =np.sort( first_4_sorted[arg_sortnew][2:] ) #plt.figure(ind_tot) #plt.imshow(textline_image) #plt.plot([peaks_final[0], peaks_final[0]], [0, height-1]) #plt.plot([peaks_final[1], peaks_final[1]], [0, height-1]) #plt.savefig('./'+str(ind_tot)+'.png') return peaks_final[0], peaks_final[1] else: pass def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image, ind_tot): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] common_window = int(0.06*width) width1 = int ( width/2. - common_window ) width2 = int ( width/2. + common_window ) img_sum = np.sum(textline_image[:,:,0], axis=0) sum_smoothed = gaussian_filter1d(img_sum, 3) peaks_real, _ = find_peaks(sum_smoothed, height=0) if len(peaks_real)>70: #print(len(peaks_real), 'len(peaks_real)') peaks_real = peaks_real[(peaks_realwidth1)] arg_max = np.argmax(sum_smoothed[peaks_real]) peaks_final = peaks_real[arg_max] #plt.figure(ind_tot) #plt.imshow(textline_image) #plt.plot([peaks_final, peaks_final], [0, height-1]) ##plt.plot([peaks_final[1], peaks_final[1]], [0, height-1]) #plt.savefig('./'+str(ind_tot)+'.png') return peaks_final else: return None def return_start_and_end_of_common_text_of_textline_ocr_new_splitted( self, peaks_real, sum_smoothed, start_split, end_split): peaks_real = peaks_real[(peaks_realstart_split)] arg_sort = np.argsort(sum_smoothed[peaks_real]) arg_sort4 =arg_sort[::-1][:4] peaks_sort_4 = peaks_real[arg_sort][::-1][:4] argsort_sorted = np.argsort(peaks_sort_4) first_4_sorted = peaks_sort_4[argsort_sorted] y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]] #print(first_4_sorted,'first_4_sorted') arg_sortnew = np.argsort(y_4_sorted) peaks_final =np.sort( first_4_sorted[arg_sortnew][3:] ) return peaks_final[0] def return_start_and_end_of_common_text_of_textline_ocr_new(self, textline_image, ind_tot): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] common_window = int(0.15*width) width1 = int ( width/2. - common_window ) width2 = int ( width/2. + common_window ) mid = int(width/2.) img_sum = np.sum(textline_image[:,:,0], axis=0) sum_smoothed = gaussian_filter1d(img_sum, 3) peaks_real, _ = find_peaks(sum_smoothed, height=0) if len(peaks_real)>70: peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted( peaks_real, sum_smoothed, width1, mid+2) peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted( peaks_real, sum_smoothed, mid-2, width2) #plt.figure(ind_tot) #plt.imshow(textline_image) #plt.plot([peak_start, peak_start], [0, height-1]) #plt.plot([peak_end, peak_end], [0, height-1]) #plt.savefig('./'+str(ind_tot)+'.png') return peak_start, peak_end else: pass def return_ocr_of_textline_without_common_section( self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): if h2w_ratio > 0.05: pixel_values = processor(textline_image, return_tensors="pt").pixel_values generated_ids = model_ocr.generate(pixel_values.to(device)) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] else: #width = np.shape(textline_image)[1] #height = np.shape(textline_image)[0] #common_window = int(0.3*width) #width1 = int ( width/2. - common_window ) #width2 = int ( width/2. + common_window ) split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section( textline_image, ind_tot) if split_point: image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height)) image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height)) #pixel_values1 = processor(image1, return_tensors="pt").pixel_values #pixel_values2 = processor(image2, return_tensors="pt").pixel_values pixel_values_merged = processor([image1,image2], return_tensors="pt").pixel_values generated_ids_merged = model_ocr.generate(pixel_values_merged.to(device)) generated_text_merged = processor.batch_decode(generated_ids_merged, skip_special_tokens=True) #print(generated_text_merged,'generated_text_merged') #generated_ids1 = model_ocr.generate(pixel_values1.to(device)) #generated_ids2 = model_ocr.generate(pixel_values2.to(device)) #generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0] #generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0] #generated_text = generated_text1 + ' ' + generated_text2 generated_text = generated_text_merged[0] + ' ' + generated_text_merged[1] #print(generated_text1,'generated_text1') #print(generated_text2, 'generated_text2') #print('########################################') else: pixel_values = processor(textline_image, return_tensors="pt").pixel_values generated_ids = model_ocr.generate(pixel_values.to(device)) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] #print(generated_text,'generated_text') #print('########################################') return generated_text def return_ocr_of_textline( self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): if h2w_ratio > 0.05: pixel_values = processor(textline_image, return_tensors="pt").pixel_values generated_ids = model_ocr.generate(pixel_values.to(device)) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] else: #width = np.shape(textline_image)[1] #height = np.shape(textline_image)[0] #common_window = int(0.3*width) #width1 = int ( width/2. - common_window ) #width2 = int ( width/2. + common_window ) try: width1, width2 = self.return_start_and_end_of_common_text_of_textline_ocr_new(textline_image, ind_tot) image1 = textline_image[:, :width2,:]# image.crop((0, 0, width2, height)) image2 = textline_image[:, width1:,:]#image.crop((width1, 0, width, height)) pixel_values1 = processor(image1, return_tensors="pt").pixel_values pixel_values2 = processor(image2, return_tensors="pt").pixel_values generated_ids1 = model_ocr.generate(pixel_values1.to(device)) generated_ids2 = model_ocr.generate(pixel_values2.to(device)) generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0] generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0] #print(generated_text1,'generated_text1') #print(generated_text2, 'generated_text2') #print('########################################') match = sq(None, generated_text1, generated_text2).find_longest_match( 0, len(generated_text1), 0, len(generated_text2)) generated_text = generated_text1 + generated_text2[match.b+match.size:] except: pixel_values = processor(textline_image, return_tensors="pt").pixel_values generated_ids = model_ocr.generate(pixel_values.to(device)) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_text def return_textline_contour_with_added_box_coordinate(self, textline_contour, box_ind): textline_contour[:,0] = textline_contour[:,0] + box_ind[2] textline_contour[:,1] = textline_contour[:,1] + box_ind[0] return textline_contour def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] def return_it_in_two_groups(self, x_differential): split = [ind if x_differential[ind]!=x_differential[ind+1] else -1 for ind in range(len(x_differential)-1)] split_masked = list( np.array(split[:])[np.array(split[:])!=-1] ) if 0 not in split_masked: split_masked.insert(0, -1) split_masked.append(len(x_differential)-1) split_masked = np.array(split_masked) +1 sums = [np.sum(x_differential[split_masked[ind]:split_masked[ind+1]]) for ind in range(len(split_masked)-1)] indexes_to_bec_changed = [ind if (np.abs(sums[ind-1]) > np.abs(sums[ind]) and np.abs(sums[ind+1]) > np.abs(sums[ind])) else -1 for ind in range(1,len(sums)-1)] indexes_to_bec_changed_filtered = np.array(indexes_to_bec_changed)[np.array(indexes_to_bec_changed)!=-1] x_differential_new = np.copy(x_differential) for i in indexes_to_bec_changed_filtered: i_slice = slice(split_masked[i], split_masked[i+1]) x_differential_new[i_slice] = -1 * np.array(x_differential)[i_slice] return x_differential_new def dilate_textregions_contours_textline_version(self, all_found_textline_polygons): #print(all_found_textline_polygons) for j in range(len(all_found_textline_polygons)): for ij in range(len(all_found_textline_polygons[j])): con_ind = all_found_textline_polygons[j][ij] area = cv2.contourArea(con_ind) con_ind = con_ind.astype(float) x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) x_differential = gaussian_filter1d(x_differential, 0.1) y_differential = gaussian_filter1d(y_differential, 0.1) x_min = float(np.min( con_ind[:,0,0] )) y_min = float(np.min( con_ind[:,0,1] )) x_max = float(np.max( con_ind[:,0,0] )) y_max = float(np.max( con_ind[:,0,1] )) x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] abs_diff=abs(abs(x_differential)- abs(y_differential) ) inc_x = np.zeros(len(x_differential)+1) inc_y = np.zeros(len(x_differential)+1) if (y_max-y_min) <= (x_max-x_min): dilation_m1 = round(area / (x_max-x_min) * 0.12) else: dilation_m1 = round(area / (y_max-y_min) * 0.12) if dilation_m1>8: dilation_m1 = 8 if dilation_m1<6: dilation_m1 = 6 #print(dilation_m1, 'dilation_m1') dilation_m1 = 6 dilation_m2 = int(dilation_m1/2.) +1 for i in range(len(x_differential)): if abs_diff[i]==0: inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and abs_diff[i]>=3: if abs(x_differential[i])>abs(y_differential[i]): inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) else: inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) else: inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] con_scaled = con_ind*1 con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 area_scaled = cv2.contourArea(con_scaled.astype(np.int32)) con_ind = con_ind.astype(np.int32) results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] results = np.array(results) #print(results,'results') results[results==0] = 1 diff_result = np.diff(results) indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] if results[0]==1: con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] #indices_2 = indices_2[1:] indices_m2 = indices_m2[1:] if len(indices_2)>len(indices_m2): con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] indices_2 = indices_2[:-1] for ii in range(len(indices_2)): con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons def dilate_textregions_contours(self, all_found_textline_polygons): #print(all_found_textline_polygons) for j in range(len(all_found_textline_polygons)): con_ind = all_found_textline_polygons[j] #print(len(con_ind[:,0,0]),'con_ind[:,0,0]') area = cv2.contourArea(con_ind) con_ind = con_ind.astype(float) x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) x_differential = gaussian_filter1d(x_differential, 0.1) y_differential = gaussian_filter1d(y_differential, 0.1) x_min = float(np.min( con_ind[:,0,0] )) y_min = float(np.min( con_ind[:,0,1] )) x_max = float(np.max( con_ind[:,0,0] )) y_max = float(np.max( con_ind[:,0,1] )) x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] abs_diff=abs(abs(x_differential)- abs(y_differential) ) inc_x = np.zeros(len(x_differential)+1) inc_y = np.zeros(len(x_differential)+1) if (y_max-y_min) <= (x_max-x_min): dilation_m1 = round(area / (x_max-x_min) * 0.12) else: dilation_m1 = round(area / (y_max-y_min) * 0.12) if dilation_m1>8: dilation_m1 = 8 if dilation_m1<6: dilation_m1 = 6 #print(dilation_m1, 'dilation_m1') dilation_m1 = 6 dilation_m2 = int(dilation_m1/2.) +1 for i in range(len(x_differential)): if abs_diff[i]==0: inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and abs_diff[i]>=3: if abs(x_differential[i])>abs(y_differential[i]): inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) else: inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) else: inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] con_scaled = con_ind*1 con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 area_scaled = cv2.contourArea(con_scaled.astype(np.int32)) con_ind = con_ind.astype(np.int32) results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] results = np.array(results) #print(results,'results') results[results==0] = 1 diff_result = np.diff(results) indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] if results[0]==1: con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] #indices_2 = indices_2[1:] indices_m2 = indices_m2[1:] if len(indices_2)>len(indices_m2): con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] indices_2 = indices_2[:-1] for ii in range(len(indices_2)): con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons def dilate_textline_contours(self, all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): for ij in range(len(all_found_textline_polygons[j])): con_ind = all_found_textline_polygons[j][ij] area = cv2.contourArea(con_ind) con_ind = con_ind.astype(float) x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) x_differential = gaussian_filter1d(x_differential, 3) y_differential = gaussian_filter1d(y_differential, 3) x_min = float(np.min( con_ind[:,0,0] )) y_min = float(np.min( con_ind[:,0,1] )) x_max = float(np.max( con_ind[:,0,0] )) y_max = float(np.max( con_ind[:,0,1] )) x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] abs_diff=abs(abs(x_differential)- abs(y_differential) ) inc_x = np.zeros(len(x_differential)+1) inc_y = np.zeros(len(x_differential)+1) if (y_max-y_min) <= (x_max-x_min): dilation_m1 = round(area / (x_max-x_min) * 0.35) else: dilation_m1 = round(area / (y_max-y_min) * 0.35) if dilation_m1>12: dilation_m1 = 12 if dilation_m1<4: dilation_m1 = 4 #print(dilation_m1, 'dilation_m1') dilation_m2 = int(dilation_m1/2.) +1 for i in range(len(x_differential)): if abs_diff[i]==0: inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and abs_diff[i]>=3: if abs(x_differential[i])>abs(y_differential[i]): inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) else: inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) else: inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] con_scaled = con_ind*1 con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 con_ind = con_ind.astype(np.int32) results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] results = np.array(results) results[results==0] = 1 diff_result = np.diff(results) indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] if results[0]==1: con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] indices_m2 = indices_m2[1:] if len(indices_2)>len(indices_m2): con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] indices_2 = indices_2[:-1] for ii in range(len(indices_2)): con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons def filter_contours_inside_a_bigger_one(self,contours, contours_d_ordered, image, marginal_cnts=None, type_contour="textregion"): if type_contour=="textregion": areas = [cv2.contourArea(contours[j]) for j in range(len(contours))] area_tot = image.shape[0]*image.shape[1] M_main = [cv2.moments(contours[j]) for j in range(len(contours))] cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] areas_ratio = np.array(areas)/ area_tot contours_index_small = [ind for ind in range(len(contours)) if areas_ratio[ind] < 1e-3] contours_index_big = [ind for ind in range(len(contours)) if areas_ratio[ind] >= 1e-3] #contours_> = [contours[ind] for ind in contours_index_big] indexes_to_be_removed = [] for ind_small in contours_index_small: results = [cv2.pointPolygonTest(contours[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in contours_index_big] if marginal_cnts: results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in range(len(marginal_cnts))] results_marginal = np.array(results_marginal) if np.any(results_marginal==1): indexes_to_be_removed.append(ind_small) results = np.array(results) if np.any(results==1): indexes_to_be_removed.append(ind_small) if len(indexes_to_be_removed)>0: indexes_to_be_removed = np.unique(indexes_to_be_removed) indexes_to_be_removed = np.sort(indexes_to_be_removed)[::-1] for ind in indexes_to_be_removed: contours.pop(ind) if len(contours_d_ordered)>0: contours_d_ordered.pop(ind) return contours, contours_d_ordered else: contours_txtline_of_all_textregions = [] indexes_of_textline_tot = [] index_textline_inside_textregion = [] for jj in range(len(contours)): contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours[jj] ind_textline_inside_tr = list(range(len(contours[jj]))) index_textline_inside_textregion = index_textline_inside_textregion + ind_textline_inside_tr ind_ins = [jj] * len(contours[jj]) indexes_of_textline_tot = indexes_of_textline_tot + ind_ins M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j]) for j in range(len(contours_txtline_of_all_textregions))] cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] areas_tot = [cv2.contourArea(con_ind) for con_ind in contours_txtline_of_all_textregions] area_tot_tot = image.shape[0]*image.shape[1] textregion_index_to_del = [] textline_in_textregion_index_to_del = [] for ij in range(len(contours_txtline_of_all_textregions)): args_all = list(np.array(range(len(contours_txtline_of_all_textregions)))) args_all.pop(ij) areas_without = np.array(areas_tot)[args_all] area_of_con_interest = areas_tot[ij] args_with_bigger_area = np.array(args_all)[areas_without > 1.5*area_of_con_interest] if len(args_with_bigger_area)>0: results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False) for ind in args_with_bigger_area ] results = np.array(results) if np.any(results==1): #print(indexes_of_textline_tot[ij], index_textline_inside_textregion[ij]) textregion_index_to_del.append(int(indexes_of_textline_tot[ij])) textline_in_textregion_index_to_del.append(int(index_textline_inside_textregion[ij])) #contours[int(indexes_of_textline_tot[ij])].pop(int(index_textline_inside_textregion[ij])) textregion_index_to_del = np.array(textregion_index_to_del) textline_in_textregion_index_to_del = np.array(textline_in_textregion_index_to_del) for ind_u_a_trs in np.unique(textregion_index_to_del): textline_in_textregion_index_to_del_ind = textline_in_textregion_index_to_del[textregion_index_to_del==ind_u_a_trs] textline_in_textregion_index_to_del_ind = np.sort(textline_in_textregion_index_to_del_ind)[::-1] for ittrd in textline_in_textregion_index_to_del_ind: contours[ind_u_a_trs].pop(ittrd) return contours def filter_contours_without_textline_inside( self, contours,text_con_org, contours_textline, contours_only_text_parent_d_ordered, conf_contours_textregions): ###contours_txtline_of_all_textregions = [] ###for jj in range(len(contours_textline)): ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj] ###M_main_textline = [cv2.moments(contours_txtline_of_all_textregions[j]) ### for j in range(len(contours_txtline_of_all_textregions))] ###cx_main_textline = [(M_main_textline[j]["m10"] / (M_main_textline[j]["m00"] + 1e-32)) ### for j in range(len(M_main_textline))] ###cy_main_textline = [(M_main_textline[j]["m01"] / (M_main_textline[j]["m00"] + 1e-32)) ### for j in range(len(M_main_textline))] ###M_main = [cv2.moments(contours[j]) for j in range(len(contours))] ###cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] ###cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] ###contours_with_textline = [] ###for ind_tr, con_tr in enumerate(contours): ###results = [cv2.pointPolygonTest(con_tr, (cx_main_textline[index_textline_con], cy_main_textline[index_textline_con]), False) ### for index_textline_con in range(len(contours_txtline_of_all_textregions)) ] ###results = np.array(results) ###if np.any(results==1): ###contours_with_textline.append(con_tr) textregion_index_to_del = [] for index_textregion, textlines_textregion in enumerate(contours_textline): if len(textlines_textregion)==0: textregion_index_to_del.append(index_textregion) uniqe_args_trs = np.unique(textregion_index_to_del) uniqe_args_trs_sorted = np.sort(uniqe_args_trs)[::-1] for ind_u_a_trs in uniqe_args_trs_sorted: conf_contours_textregions.pop(ind_u_a_trs) contours.pop(ind_u_a_trs) contours_textline.pop(ind_u_a_trs) text_con_org.pop(ind_u_a_trs) if len(contours_only_text_parent_d_ordered) > 0: contours_only_text_parent_d_ordered.pop(ind_u_a_trs) return contours, text_con_org, conf_contours_textregions, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours))) def dilate_textlines(self, all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): for i in range(len(all_found_textline_polygons[j])): con_ind = all_found_textline_polygons[j][i] con_ind = con_ind.astype(float) x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) x_min = float(np.min( con_ind[:,0,0] )) y_min = float(np.min( con_ind[:,0,1] )) x_max = float(np.max( con_ind[:,0,0] )) y_max = float(np.max( con_ind[:,0,1] )) if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) mult = x_biger_than_x*x_differential arg_min_mult = np.argmin(mult) arg_max_mult = np.argmax(mult) if y_differential[0]==0: y_differential[0] = 0.1 if y_differential[-1]==0: y_differential[-1]= 0.1 y_differential = [y_differential[ind] if y_differential[ind] != 0 else 0.5 * (y_differential[ind-1] + y_differential[ind+1]) for ind in range(len(y_differential))] if y_differential[0]==0.1: y_differential[0] = y_differential[1] if y_differential[-1]==0.1: y_differential[-1] = y_differential[-2] y_differential.append(y_differential[0]) y_differential = [-1 if y_differential[ind] < 0 else 1 for ind in range(len(y_differential))] y_differential = self.return_it_in_two_groups(y_differential) y_differential = np.array(y_differential) con_scaled = con_ind*1 con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 try: con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5 con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5 except: pass con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8 con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8 try: con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5 con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5 except: pass else: y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) mult = y_biger_than_x*y_differential arg_min_mult = np.argmin(mult) arg_max_mult = np.argmax(mult) if x_differential[0]==0: x_differential[0] = 0.1 if x_differential[-1]==0: x_differential[-1]= 0.1 x_differential = [x_differential[ind] if x_differential[ind] != 0 else 0.5 * (x_differential[ind-1] + x_differential[ind+1]) for ind in range(len(x_differential))] if x_differential[0]==0.1: x_differential[0] = x_differential[1] if x_differential[-1]==0.1: x_differential[-1] = x_differential[-2] x_differential.append(x_differential[0]) x_differential = [-1 if x_differential[ind] < 0 else 1 for ind in range(len(x_differential))] x_differential = self.return_it_in_two_groups(x_differential) x_differential = np.array(x_differential) con_scaled = con_ind*1 con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 try: con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5 con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5 except: pass con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8 con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8 try: con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5 con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 except: pass con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 all_found_textline_polygons[j][i][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons def delete_regions_without_textlines( self, slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con): slopes_rem = [] all_found_textline_polygons_rem = [] boxes_text_rem = [] txt_con_org_rem = [] contours_only_text_parent_rem = [] index_by_text_par_con_rem = [] for i, ind_con in enumerate(all_found_textline_polygons): if len(ind_con): all_found_textline_polygons_rem.append(ind_con) slopes_rem.append(slopes[i]) boxes_text_rem.append(boxes_text[i]) txt_con_org_rem.append(txt_con_org[i]) contours_only_text_parent_rem.append(contours_only_text_parent[i]) index_by_text_par_con_rem.append(index_by_text_par_con[i]) index_sort = np.argsort(index_by_text_par_con_rem) indexes_new = np.array(range(len(index_by_text_par_con_rem))) index_by_text_par_con_rem_sort = [indexes_new[index_sort==j][0] for j in range(len(index_by_text_par_con_rem))] return (slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem, contours_only_text_parent_rem, index_by_text_par_con_rem_sort) def run(self, image_filename : Optional[str] = None, dir_in : Optional[str] = None, overwrite : bool = False): """ Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") t0_tot = time.time() if dir_in: self.ls_imgs = os.listdir(dir_in) elif image_filename: self.ls_imgs = [image_filename] else: raise ValueError("run requires either a single image filename or a directory") for img_filename in self.ls_imgs: self.logger.info(img_filename) t0 = time.time() self.reset_file_name_dir(os.path.join(dir_in or "", img_filename)) #print("text region early -11 in %.1fs", time.time() - t0) if os.path.exists(self.writer.output_filename): if overwrite: self.logger.warning("will overwrite existing output file '%s'", self.writer.output_filename) else: self.logger.warning("will skip input for existing output file '%s'", self.writer.output_filename) continue pcgts = self.run_single() self.logger.info("Job done in %.1fs", time.time() - t0) #print("Job done in %.1fs" % (time.time() - t0)) self.writer.write_pagexml(pcgts) if dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) print("all Job done in %.1fs", time.time() - t0_tot) def run_single(self): t0 = time.time() img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) if self.extract_only_images: text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \ self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) ocr_all_textlines = None pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], polygons_of_images, [], [], [], [], [], cont_page, [], [], ocr_all_textlines, []) if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) return pcgts if self.skip_layout_and_reading_order: _ ,_, _, textline_mask_tot_ea, img_bin_light, _ = \ self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = \ self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea) cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) all_found_textline_polygons = filter_contours_area_of_image( textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) all_found_textline_polygons=[ all_found_textline_polygons ] all_found_textline_polygons = self.dilate_textregions_contours_textline_version( all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( all_found_textline_polygons, None, textline_mask_tot_ea, type_contour="textline") order_text_new = [0] slopes =[0] id_of_texts_tot =['region_0001'] polygons_of_images = [] slopes_marginals = [] polygons_of_marginals = [] all_found_textline_polygons_marginals = [] all_box_coord_marginals = [] polygons_lines_xml = [] contours_tables = [] ocr_all_textlines = None conf_contours_textregions =None pcgts = self.writer.build_pagexml_no_full_layout( cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions) return pcgts #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if self.light_version: text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light, confidence_matrix = \ self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) #print("text region early -2 in %.1fs", time.time() - t0) if num_col_classifier == 1 or num_col_classifier ==2: if num_col_classifier == 1: img_w_new = 1000 else: img_w_new = 1300 img_h_new = img_w_new * textline_mask_tot_ea.shape[0] // textline_mask_tot_ea.shape[1] textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) else: slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) #self.logger.info("run graphics %.1fs ", time.time() - t1t) #print("text region early -3 in %.1fs", time.time() - t0) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) #print("text region early -4 in %.1fs", time.time() - t0) else: text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \ self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) confidence_matrix = np.zeros((text_regions_p_1.shape[:2])) t1 = time.time() num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ text_regions_p_1, cont_page, table_prediction = \ self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) self.logger.info("Graphics detection took %.1fs ", time.time() - t1) #self.logger.info('cont_page %s', cont_page) #plt.imshow(table_prediction) #plt.show() if not num_col: self.logger.info("No columns detected, outputting an empty PAGE-XML") ocr_all_textlines = None pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines, []) return pcgts #print("text region early in %.1fs", time.time() - t0) t1 = time.time() if not self.light_version: textline_mask_tot_ea = self.run_textline(image_page) self.logger.info("textline detection took %.1fs", time.time() - t1) t1 = time.time() slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) self.logger.info("deskewing took %.1fs", time.time() - t1) elif num_col_classifier in (1,2): org_h_l_m = textline_mask_tot_ea.shape[0] org_w_l_m = textline_mask_tot_ea.shape[1] if num_col_classifier == 1: img_w_new = 2000 else: img_w_new = 2400 img_h_new = img_w_new * textline_mask_tot_ea.shape[0] // textline_mask_tot_ea.shape[1] image_page = resize_image(image_page,img_h_new, img_w_new ) textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) mask_images = resize_image(mask_images,img_h_new, img_w_new ) mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) textline_mask_tot, text_regions_p, image_page_rotated = \ self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) if self.light_version and num_col_classifier in (1,2): image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) self.logger.info("detection of marginals took %.1fs", time.time() - t1) #print("text region early 2 marginal in %.1fs", time.time() - t0) ## birdan sora chock chakir t1 = time.time() if not self.full_layout: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \ boxes, boxes_d, polygons_of_marginals, contours_tables = \ self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) else: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \ regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) if self.light_version: drop_label_in_full_layout = 4 textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 #print("text region early 2 in %.1fs", time.time() - t0) ###min_con_area = 0.000005 contours_only_text, hir_on_text = return_contours_of_image(text_only) contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) if len(contours_only_text_parent) > 0: areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) #self.logger.info('areas_cnt_text %s', areas_cnt_text) contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) contours_only_text_parent = self.return_list_of_contours_with_desired_order( contours_only_text_parent, index_con_parents) ##try: ##contours_only_text_parent = \ ##list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) ##except: ##contours_only_text_parent = \ ##list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) areas_cnt_text_parent = self.return_list_of_contours_with_desired_order( areas_cnt_text_parent, index_con_parents) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) if len(areas_cnt_text_d)>0: contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] index_con_parents_d = np.argsort(areas_cnt_text_d) contours_only_text_parent_d = self.return_list_of_contours_with_desired_order( contours_only_text_parent_d, index_con_parents_d) #try: #contours_only_text_parent_d = \ #list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) #except: #contours_only_text_parent_d = \ #list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) areas_cnt_text_d = self.return_list_of_contours_with_desired_order( areas_cnt_text_d, index_con_parents_d) cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) try: if len(cx_bigest_d) >= 5: cx_bigest_d_last5 = cx_bigest_d[-5:] cy_biggest_d_last5 = cy_biggest_d[-5:] dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) else: cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) cx_bigest_d_big[0] = cx_bigest_d[ind_largest] cy_biggest_d_big[0] = cy_biggest_d[ind_largest] except Exception as why: self.logger.error(why) (h, w) = text_only.shape[:2] center = (w // 2.0, h // 2.0) M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) M_22 = np.array(M)[:2, :2] p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) x_diff = p_big[0] - cx_bigest_d_big y_diff = p_big[1] - cy_biggest_d_big contours_only_text_parent_d_ordered = [] for i in range(len(contours_only_text_parent)): p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) p[0] = p[0] - x_diff[0] p[1] = p[1] - y_diff[0] dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) # plt.imshow(img2[:,:,0]) # plt.show() else: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] contours_only_text_parent = [] else: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] #contours_only_text_parent = [] if not len(contours_only_text_parent): # stop early empty_marginals = [[]] * len(polygons_of_marginals) if self.full_layout: pcgts = self.writer.build_pagexml_full_layout( [], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], cont_page, polygons_lines_xml, [], [], []) else: pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, empty_marginals, empty_marginals, [], [], cont_page, polygons_lines_xml, contours_tables, [], []) return pcgts #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: contours_only_text_parent = self.dilate_textregions_contours( contours_only_text_parent) contours_only_text_parent , contours_only_text_parent_d_ordered = self.filter_contours_inside_a_bigger_one( contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, marginal_cnts=polygons_of_marginals) #print("text region early 3.5 in %.1fs", time.time() - t0) txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( contours_only_text_parent, self.image, slope_first, confidence_matrix, map=self.executor.map) #txt_con_org = self.dilate_textregions_contours(txt_con_org) #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( contours_only_text_parent, self.image, slope_first, confidence_matrix, map=self.executor.map) #print("text region early 4 in %.1fs", time.time() - t0) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) #print("text region early 5 in %.1fs", time.time() - t0) ## birdan sora chock chakir if not self.curved_line: if self.light_version: if self.textline_light: all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light2( txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light2( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, # boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, # boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.dilate_textregions_contours_textline_version( all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( all_found_textline_polygons, None, textline_mask_tot_ea_org, type_contour="textline") all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version( all_found_textline_polygons_marginals) contours_only_text_parent, txt_con_org, conf_contours_textregions, all_found_textline_polygons, contours_only_text_parent_d_ordered, \ index_by_text_par_con = self.filter_contours_without_textline_inside( contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, conf_contours_textregions) else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \ index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light( txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( # all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new( txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: scale_param = 1 textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved( txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons = small_textlines_to_parent_adherence2( all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2( all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order( contours_only_text_parent_d_ordered, index_by_text_par_con) #try: #contours_only_text_parent_d_ordered = \ #list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) #except: #contours_only_text_parent_d_ordered = \ #list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) else: #takes long timee contours_only_text_parent_d_ordered = None if self.light_version: fun = check_any_text_region_in_model_one_is_main_or_header_light else: fun = check_any_text_region_in_model_one_is_main_or_header text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \ all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, \ conf_contours_textregions, conf_contours_textregions_h = fun( text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered, conf_contours_textregions) if self.plotter: self.plotter.save_plot_of_layout(text_regions_p, image_page) self.plotter.save_plot_of_layout_all(text_regions_p, image_page) pixel_img = 4 polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline( text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) if not self.reading_order_machine_based: pixel_seps = 6 if not self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h) else: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h_d_ordered) elif self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_seps) else: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_seps) if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: regions_without_separators = regions_without_separators.astype(np.uint8) regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) else: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) else: boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) t_order = time.time() if self.full_layout: if self.reading_order_machine_based: order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions( contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: order_text_new, id_of_texts_tot = self.do_order_of_regions( contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) self.logger.info("detection of reading order took %.1fs", time.time() - t_order) if self.ocr: ocr_all_textlines = [] else: ocr_all_textlines = None pcgts = self.writer.build_pagexml_full_layout( contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines, conf_contours_textregions, conf_contours_textregions_h) return pcgts contours_only_text_parent_h = None if self.reading_order_machine_based: order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions( contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order( contours_only_text_parent_d_ordered, index_by_text_par_con) #try: #contours_only_text_parent_d_ordered = \ #list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) #except: #contours_only_text_parent_d_ordered = \ #list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions( contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) if self.ocr: device = cuda.get_current_device() device.reset() gc.collect() model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") torch.cuda.empty_cache() model_ocr.to(device) ind_tot = 0 #cv2.imwrite('./img_out.png', image_page) ocr_all_textlines = [] for indexing, ind_poly_first in enumerate(all_found_textline_polygons): ocr_textline_in_textregion = [] for indexing2, ind_poly in enumerate(ind_poly_first): if not (self.textline_light or self.curved_line): ind_poly = copy.deepcopy(ind_poly) box_ind = all_box_coord[indexing] #print(ind_poly,np.shape(ind_poly), 'ind_poly') #print(box_ind) ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) #print(ind_poly_copy) ind_poly[ind_poly<0] = 0 x, y, w, h = cv2.boundingRect(ind_poly) #print(ind_poly_copy, np.shape(ind_poly_copy)) #print(x, y, w, h, h/float(w),'ratio') h2w_ratio = h/float(w) mask_poly = np.zeros(image_page.shape) if not self.light_version: img_poly_on_img = np.copy(image_page) else: img_poly_on_img = np.copy(img_bin_light) mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) if self.textline_light: mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 img_croped = img_poly_on_img[y:y+h, x:x+w, :] #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) ocr_textline_in_textregion.append(text_ocr) ind_tot = ind_tot +1 ocr_all_textlines.append(ocr_textline_in_textregion) else: ocr_all_textlines = None #print(ocr_all_textlines) self.logger.info("detection of reading order took %.1fs", time.time() - t_order) pcgts = self.writer.build_pagexml_no_full_layout( txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions) return pcgts class Eynollah_ocr: def __init__( self, dir_models, dir_xmls=None, dir_in=None, dir_in_bin=None, dir_out=None, dir_out_image_text=None, tr_ocr=False, batch_size=None, export_textline_images_and_text=False, do_not_mask_with_textline_contour=False, draw_texts_on_image=False, prediction_with_both_of_rgb_and_bin=False, logger=None, ): self.dir_in = dir_in self.dir_in_bin = dir_in_bin self.dir_out = dir_out self.dir_xmls = dir_xmls self.dir_models = dir_models self.tr_ocr = tr_ocr self.export_textline_images_and_text = export_textline_images_and_text self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour self.draw_texts_on_image = draw_texts_on_image self.dir_out_image_text = dir_out_image_text self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin if tr_ocr: self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124" self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) self.model_ocr.to(self.device) if not batch_size: self.b_s = 2 else: self.b_s = int(batch_size) else: self.model_ocr_dir = dir_models + "/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" model_ocr = load_model(self.model_ocr_dir , compile=False) self.prediction_model = tf.keras.models.Model( model_ocr.get_layer(name = "image").input, model_ocr.get_layer(name = "dense2").output) if not batch_size: self.b_s = 8 else: self.b_s = int(batch_size) with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file: characters = json.load(config_file) AUTOTUNE = tf.data.AUTOTUNE # Mapping characters to integers. char_to_num = StringLookup(vocabulary=list(characters), mask_token=None) # Mapping integers back to original characters. self.num_to_char = StringLookup( vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True ) def decode_batch_predictions(self, pred, max_len = 128): # input_len is the product of the batch size and the # number of time steps. input_len = np.ones(pred.shape[0]) * pred.shape[1] # Decode CTC predictions using greedy search. # decoded is a tuple with 2 elements. decoded = tf.keras.backend.ctc_decode(pred, input_length = input_len, beam_width = 100) # The outputs are in the first element of the tuple. # Additionally, the first element is actually a list, # therefore we take the first element of that list as well. #print(decoded,'decoded') decoded = decoded[0][0][:, :max_len] #print(decoded, decoded.shape,'decoded') output = [] for d in decoded: # Convert the predicted indices to the corresponding chars. d = tf.strings.reduce_join(self.num_to_char(d)) d = d.numpy().decode("utf-8") output.append(d) return output def distortion_free_resize(self, image, img_size): w, h = img_size image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True) # Check tha amount of padding needed to be done. pad_height = h - tf.shape(image)[0] pad_width = w - tf.shape(image)[1] # Only necessary if you want to do same amount of padding on both sides. if pad_height % 2 != 0: height = pad_height // 2 pad_height_top = height + 1 pad_height_bottom = height else: pad_height_top = pad_height_bottom = pad_height // 2 if pad_width % 2 != 0: width = pad_width // 2 pad_width_left = width + 1 pad_width_right = width else: pad_width_left = pad_width_right = pad_width // 2 image = tf.pad( image, paddings=[ [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0, 0], ], ) image = tf.transpose(image, (1, 0, 2)) image = tf.image.flip_left_right(image) return image def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] common_window = int(0.22*width) width1 = int ( width/2. - common_window ) width2 = int ( width/2. + common_window ) img_sum = np.sum(textline_image[:,:,0], axis=0) sum_smoothed = gaussian_filter1d(img_sum, 3) peaks_real, _ = find_peaks(sum_smoothed, height=0) if len(peaks_real)>35: #peaks_real = peaks_real[(peaks_realwidth1)] argsort = np.argsort(sum_smoothed[peaks_real])[::-1] peaks_real_top_six = peaks_real[argsort[:6]] midpoint = textline_image.shape[1] / 2. arg_closest = np.argmin(np.abs(peaks_real_top_six - midpoint)) #arg_max = np.argmax(sum_smoothed[peaks_real]) peaks_final = peaks_real_top_six[arg_closest]#peaks_real[arg_max] return peaks_final else: return None # Function to fit text inside the given area def fit_text_single_line(self, draw, text, font_path, max_width, max_height): initial_font_size = 50 font_size = initial_font_size while font_size > 10: # Minimum font size font = ImageFont.truetype(font_path, font_size) text_bbox = draw.textbbox((0, 0), text, font=font) # Get text bounding box text_width = text_bbox[2] - text_bbox[0] text_height = text_bbox[3] - text_bbox[1] if text_width <= max_width and text_height <= max_height: return font # Return the best-fitting font font_size -= 2 # Reduce font size and retry return ImageFont.truetype(font_path, 10) # Smallest font fallback def return_textlines_split_if_needed(self, textline_image, textline_image_bin): split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image) if split_point: image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height)) image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height)) if self.prediction_with_both_of_rgb_and_bin: image1_bin = textline_image_bin[:, :split_point,:]# image.crop((0, 0, width2, height)) image2_bin = textline_image_bin[:, split_point:,:]#image.crop((width1, 0, width, height)) return [image1, image2], [image1_bin, image2_bin] else: return [image1, image2], None else: return None, None def preprocess_and_resize_image_for_ocrcnn_model(self, img, image_height, image_width): ratio = image_height /float(img.shape[0]) w_ratio = int(ratio * img.shape[1]) if w_ratio <= image_width: width_new = w_ratio else: width_new = image_width if width_new == 0: width_new = img.shape[1] ##if width_new+32 >= image_width: ##width_new = width_new - 32 ###patch_zero = np.zeros((32, 32, 3))#+255 ###patch_zero[9:19,8:18,:] = 0 img = resize_image(img, image_height, width_new) img_fin = np.ones((image_height, image_width, 3))*255 ###img_fin[:,:32,:] = patch_zero[:,:,:] ###img_fin[:,32:32+width_new,:] = img[:,:,:] img_fin[:,:width_new,:] = img[:,:,:] img_fin = img_fin / 255. return img_fin def run(self): ls_imgs = os.listdir(self.dir_in) if self.tr_ocr: tr_ocr_input_height_and_width = 384 for ind_img in ls_imgs: file_name = Path(ind_img).stem dir_img = os.path.join(self.dir_in, ind_img) dir_xml = os.path.join(self.dir_xmls, file_name+'.xml') out_file_ocr = os.path.join(self.dir_out, file_name+'.xml') img = cv2.imread(dir_img) if self.draw_texts_on_image: out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png') image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white") draw = ImageDraw.Draw(image_text) total_bb_coordinates = [] ##file_name = Path(dir_xmls).stem tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding="utf-8")) root1=tree1.getroot() alltags=[elem.tag for elem in root1.iter()] link=alltags[0].split('}')[0]+'}' name_space = alltags[0].split('}')[0] name_space = name_space.split('{')[1] region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')]) cropped_lines = [] cropped_lines_region_indexer = [] cropped_lines_meging_indexing = [] indexer_text_region = 0 for nn in root1.iter(region_tags): for child_textregion in nn: if child_textregion.tag.endswith("TextLine"): for child_textlines in child_textregion: if child_textlines.tag.endswith("Coords"): cropped_lines_region_indexer.append(indexer_text_region) p_h=child_textlines.attrib['points'].split(' ') textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) x,y,w,h = cv2.boundingRect(textline_coords) if self.draw_texts_on_image: total_bb_coordinates.append([x,y,w,h]) h2w_ratio = h/float(w) img_poly_on_img = np.copy(img) mask_poly = np.zeros(img.shape) mask_poly = cv2.fillPoly(mask_poly, pts=[textline_coords], color=(1, 1, 1)) mask_poly = mask_poly[y:y+h, x:x+w, :] img_crop = img_poly_on_img[y:y+h, x:x+w, :] img_crop[mask_poly==0] = 255 if h2w_ratio > 0.1: cropped_lines.append(resize_image(img_crop, tr_ocr_input_height_and_width, tr_ocr_input_height_and_width) ) cropped_lines_meging_indexing.append(0) else: splited_images, _ = self.return_textlines_split_if_needed(img_crop, None) #print(splited_images) if splited_images: cropped_lines.append(resize_image(splited_images[0], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)) cropped_lines_meging_indexing.append(1) cropped_lines.append(resize_image(splited_images[1], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)) cropped_lines_meging_indexing.append(-1) else: cropped_lines.append(img_crop) cropped_lines_meging_indexing.append(0) indexer_text_region = indexer_text_region +1 extracted_texts = [] n_iterations = math.ceil(len(cropped_lines) / self.b_s) for i in range(n_iterations): if i==(n_iterations-1): n_start = i*self.b_s imgs = cropped_lines[n_start:] else: n_start = i*self.b_s n_end = (i+1)*self.b_s imgs = cropped_lines[n_start:n_end] pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) extracted_texts = extracted_texts + generated_text_merged del cropped_lines gc.collect() extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] #print(extracted_texts_merged, len(extracted_texts_merged)) unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) if self.draw_texts_on_image: font_path = "NotoSans-Regular.ttf" # Make sure this file exists! font = ImageFont.truetype(font_path, 40) for indexer_text, bb_ind in enumerate(total_bb_coordinates): x_bb = bb_ind[0] y_bb = bb_ind[1] w_bb = bb_ind[2] h_bb = bb_ind[3] font = self.fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) ) ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2) text_bbox = draw.textbbox((0, 0), extracted_texts_merged[indexer_text], font=font) text_width = text_bbox[2] - text_bbox[0] text_height = text_bbox[3] - text_bbox[1] text_x = x_bb + (w_bb - text_width) // 2 # Center horizontally text_y = y_bb + (h_bb - text_height) // 2 # Center vertically # Draw the text draw.text((text_x, text_y), extracted_texts_merged[indexer_text], fill="black", font=font) image_text.save(out_image_with_text) #print(len(unique_cropped_lines_region_indexer), 'unique_cropped_lines_region_indexer') text_by_textregion = [] for ind in unique_cropped_lines_region_indexer: extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] text_by_textregion.append(" ".join(extracted_texts_merged_un)) #print(len(text_by_textregion) , indexer_text_region, "text_by_textregion") #print(time.time() - t0 ,'elapsed time') indexer = 0 indexer_textregion = 0 for nn in root1.iter(region_tags): text_subelement_textregion = ET.SubElement(nn, 'TextEquiv') unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode') has_textline = False for child_textregion in nn: if child_textregion.tag.endswith("TextLine"): text_subelement = ET.SubElement(child_textregion, 'TextEquiv') unicode_textline = ET.SubElement(text_subelement, 'Unicode') unicode_textline.text = extracted_texts_merged[indexer] indexer = indexer + 1 has_textline = True if has_textline: unicode_textregion.text = text_by_textregion[indexer_textregion] indexer_textregion = indexer_textregion + 1 ET.register_namespace("",name_space) tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None) #print("Job done in %.1fs", time.time() - t0) else: max_len = 512 padding_token = 299 image_width = 512#max_len * 4 image_height = 32 img_size=(image_width, image_height) for ind_img in ls_imgs: file_name = Path(ind_img).stem dir_img = os.path.join(self.dir_in, ind_img) dir_xml = os.path.join(self.dir_xmls, file_name+'.xml') out_file_ocr = os.path.join(self.dir_out, file_name+'.xml') img = cv2.imread(dir_img) if self.prediction_with_both_of_rgb_and_bin: cropped_lines_bin = [] dir_img_bin = os.path.join(self.dir_in_bin, file_name+'.png') img_bin = cv2.imread(dir_img_bin) if self.draw_texts_on_image: out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png') image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white") draw = ImageDraw.Draw(image_text) total_bb_coordinates = [] tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding="utf-8")) root1=tree1.getroot() alltags=[elem.tag for elem in root1.iter()] link=alltags[0].split('}')[0]+'}' name_space = alltags[0].split('}')[0] name_space = name_space.split('{')[1] region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')]) cropped_lines = [] cropped_lines_region_indexer = [] cropped_lines_meging_indexing = [] tinl = time.time() indexer_text_region = 0 indexer_textlines = 0 for nn in root1.iter(region_tags): for child_textregion in nn: if child_textregion.tag.endswith("TextLine"): for child_textlines in child_textregion: if child_textlines.tag.endswith("Coords"): cropped_lines_region_indexer.append(indexer_text_region) p_h=child_textlines.attrib['points'].split(' ') textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) x,y,w,h = cv2.boundingRect(textline_coords) if self.draw_texts_on_image: total_bb_coordinates.append([x,y,w,h]) w_scaled = w * image_height/float(h) img_poly_on_img = np.copy(img) if self.prediction_with_both_of_rgb_and_bin: img_poly_on_img_bin = np.copy(img_bin) img_crop_bin = img_poly_on_img_bin[y:y+h, x:x+w, :] mask_poly = np.zeros(img.shape) mask_poly = cv2.fillPoly(mask_poly, pts=[textline_coords], color=(1, 1, 1)) mask_poly = mask_poly[y:y+h, x:x+w, :] img_crop = img_poly_on_img[y:y+h, x:x+w, :] if not self.do_not_mask_with_textline_contour: img_crop[mask_poly==0] = 255 if self.prediction_with_both_of_rgb_and_bin: img_crop_bin[mask_poly==0] = 255 if not self.export_textline_images_and_text: if w_scaled < 1.5*image_width: img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(0) if self.prediction_with_both_of_rgb_and_bin: img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width) cropped_lines_bin.append(img_fin) else: if self.prediction_with_both_of_rgb_and_bin: splited_images, splited_images_bin = self.return_textlines_split_if_needed(img_crop, img_crop_bin) else: splited_images, splited_images_bin = self.return_textlines_split_if_needed(img_crop, None) if splited_images: img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(1) img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(-1) if self.prediction_with_both_of_rgb_and_bin: img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[0], image_height, image_width) cropped_lines_bin.append(img_fin) img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[1], image_height, image_width) cropped_lines_bin.append(img_fin) else: img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(0) if self.prediction_with_both_of_rgb_and_bin: img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width) cropped_lines_bin.append(img_fin) if self.export_textline_images_and_text: if child_textlines.tag.endswith("TextEquiv"): for cheild_text in child_textlines: if cheild_text.tag.endswith("Unicode"): textline_text = cheild_text.text if textline_text: with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.txt'), 'w') as text_file: text_file.write(textline_text) cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.png'), img_crop ) indexer_textlines+=1 if not self.export_textline_images_and_text: indexer_text_region = indexer_text_region +1 if not self.export_textline_images_and_text: extracted_texts = [] n_iterations = math.ceil(len(cropped_lines) / self.b_s) for i in range(n_iterations): if i==(n_iterations-1): n_start = i*self.b_s imgs = cropped_lines[n_start:] imgs = np.array(imgs) imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3) if self.prediction_with_both_of_rgb_and_bin: imgs_bin = cropped_lines_bin[n_start:] imgs_bin = np.array(imgs_bin) imgs_bin = imgs_bin.reshape(imgs_bin.shape[0], image_height, image_width, 3) else: n_start = i*self.b_s n_end = (i+1)*self.b_s imgs = cropped_lines[n_start:n_end] imgs = np.array(imgs).reshape(self.b_s, image_height, image_width, 3) if self.prediction_with_both_of_rgb_and_bin: imgs_bin = cropped_lines_bin[n_start:n_end] imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3) preds = self.prediction_model.predict(imgs, verbose=0) if self.prediction_with_both_of_rgb_and_bin: preds_bin = self.prediction_model.predict(imgs_bin, verbose=0) preds = (preds + preds_bin) / 2. pred_texts = self.decode_batch_predictions(preds) for ib in range(imgs.shape[0]): pred_texts_ib = pred_texts[ib].replace("[UNK]", "") extracted_texts.append(pred_texts_ib) del cropped_lines if self.prediction_with_both_of_rgb_and_bin: del cropped_lines_bin gc.collect() extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None] unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) if self.draw_texts_on_image: font_path = "NotoSans-Regular.ttf" # Make sure this file exists! font = ImageFont.truetype(font_path, 40) for indexer_text, bb_ind in enumerate(total_bb_coordinates): x_bb = bb_ind[0] y_bb = bb_ind[1] w_bb = bb_ind[2] h_bb = bb_ind[3] font = self.fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) ) ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2) text_bbox = draw.textbbox((0, 0), extracted_texts_merged[indexer_text], font=font) text_width = text_bbox[2] - text_bbox[0] text_height = text_bbox[3] - text_bbox[1] text_x = x_bb + (w_bb - text_width) // 2 # Center horizontally text_y = y_bb + (h_bb - text_height) // 2 # Center vertically # Draw the text draw.text((text_x, text_y), extracted_texts_merged[indexer_text], fill="black", font=font) image_text.save(out_image_with_text) text_by_textregion = [] for ind in unique_cropped_lines_region_indexer: extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] text_by_textregion.append("".join(extracted_texts_merged_un)) indexer = 0 indexer_textregion = 0 for nn in root1.iter(region_tags): is_textregion_text = False for childtest in nn: if childtest.tag.endswith("TextEquiv"): is_textregion_text = True if not is_textregion_text: text_subelement_textregion = ET.SubElement(nn, 'TextEquiv') unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode') has_textline = False for child_textregion in nn: if child_textregion.tag.endswith("TextLine"): is_textline_text = False for childtest2 in child_textregion: if childtest2.tag.endswith("TextEquiv"): is_textline_text = True if not is_textline_text: text_subelement = ET.SubElement(child_textregion, 'TextEquiv') unicode_textline = ET.SubElement(text_subelement, 'Unicode') unicode_textline.text = extracted_texts_merged[indexer] else: for childtest3 in child_textregion: if childtest3.tag.endswith("TextEquiv"): for child_uc in childtest3: if child_uc.tag.endswith("Unicode"): child_uc.text = extracted_texts_merged[indexer] indexer = indexer + 1 has_textline = True if has_textline: if is_textregion_text: for child4 in nn: if child4.tag.endswith("TextEquiv"): for childtr_uc in child4: if childtr_uc.tag.endswith("Unicode"): childtr_uc.text = text_by_textregion[indexer_textregion] else: unicode_textregion.text = text_by_textregion[indexer_textregion] indexer_textregion = indexer_textregion + 1 ET.register_namespace("",name_space) tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None) #print("Job done in %.1fs", time.time() - t0)