mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-09-01 13:29:58 +02:00
5509 lines
280 KiB
Python
5509 lines
280 KiB
Python
# pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring,missing-class-docstring,too-many-branches
|
|
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
|
|
# pylint: disable=too-many-public-methods,too-many-arguments,too-many-instance-attributes,too-many-public-methods,
|
|
# pylint: disable=consider-using-enumerate
|
|
"""
|
|
document layout analysis (segmentation) with output in PAGE-XML
|
|
"""
|
|
|
|
from logging import Logger
|
|
from difflib import SequenceMatcher as sq
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
import math
|
|
import os
|
|
import sys
|
|
import time
|
|
from typing import Optional
|
|
import atexit
|
|
import warnings
|
|
from functools import partial
|
|
from pathlib import Path
|
|
from multiprocessing import cpu_count
|
|
import gc
|
|
import copy
|
|
import json
|
|
|
|
from loky import ProcessPoolExecutor
|
|
import xml.etree.ElementTree as ET
|
|
import cv2
|
|
import numpy as np
|
|
from scipy.signal import find_peaks
|
|
from scipy.ndimage import gaussian_filter1d
|
|
from numba import cuda
|
|
|
|
from ocrd import OcrdPage
|
|
from ocrd_utils import getLogger, tf_disable_interactive_logs
|
|
|
|
try:
|
|
import torch
|
|
except ImportError:
|
|
torch = None
|
|
try:
|
|
import matplotlib.pyplot as plt
|
|
except ImportError:
|
|
plt = None
|
|
try:
|
|
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
|
except ImportError:
|
|
TrOCRProcessor = VisionEncoderDecoderModel = None
|
|
|
|
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
|
|
tf_disable_interactive_logs()
|
|
import tensorflow as tf
|
|
from tensorflow.python.keras import backend as K
|
|
from tensorflow.keras.models import load_model
|
|
tf.get_logger().setLevel("ERROR")
|
|
warnings.filterwarnings("ignore")
|
|
# use tf1 compatibility for keras backend
|
|
from tensorflow.compat.v1.keras.backend import set_session
|
|
from tensorflow.keras import layers
|
|
from tensorflow.keras.layers import StringLookup
|
|
|
|
from .utils.contour import (
|
|
filter_contours_area_of_image,
|
|
filter_contours_area_of_image_tables,
|
|
find_contours_mean_y_diff,
|
|
find_new_features_of_contours,
|
|
find_features_of_contours,
|
|
get_text_region_boxes_by_given_contours,
|
|
get_textregion_contours_in_org_image,
|
|
get_textregion_contours_in_org_image_light,
|
|
return_contours_of_image,
|
|
return_contours_of_interested_region,
|
|
return_contours_of_interested_region_by_min_size,
|
|
return_contours_of_interested_textline,
|
|
return_parent_contours,
|
|
)
|
|
from .utils.rotate import (
|
|
rotate_image,
|
|
rotation_not_90_func,
|
|
rotation_not_90_func_full_layout
|
|
)
|
|
from .utils.separate_lines import (
|
|
textline_contours_postprocessing,
|
|
separate_lines_new2,
|
|
return_deskew_slop,
|
|
do_work_of_slopes_new,
|
|
do_work_of_slopes_new_curved,
|
|
do_work_of_slopes_new_light,
|
|
)
|
|
from .utils.drop_capitals import (
|
|
adhere_drop_capital_region_into_corresponding_textline,
|
|
filter_small_drop_capitals_from_no_patch_layout
|
|
)
|
|
from .utils.marginals import get_marginals
|
|
from .utils.resize import resize_image
|
|
from .utils import (
|
|
boosting_headers_by_longshot_region_segmentation,
|
|
crop_image_inside_box,
|
|
find_num_col,
|
|
otsu_copy_binary,
|
|
put_drop_out_from_only_drop_model,
|
|
putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
|
|
check_any_text_region_in_model_one_is_main_or_header,
|
|
check_any_text_region_in_model_one_is_main_or_header_light,
|
|
small_textlines_to_parent_adherence2,
|
|
order_of_regions,
|
|
find_number_of_columns_in_document,
|
|
return_boxes_of_images_by_order_of_reading_new
|
|
)
|
|
from .utils.pil_cv2 import check_dpi, pil2cv
|
|
from .utils.xml import order_and_id_of_texts
|
|
from .plot import EynollahPlotter
|
|
from .writer import EynollahXmlWriter
|
|
|
|
MIN_AREA_REGION = 0.000001
|
|
SLOPE_THRESHOLD = 0.13
|
|
RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45:
|
|
DPI_THRESHOLD = 298
|
|
MAX_SLOPE = 999
|
|
KERNEL = np.ones((5, 5), np.uint8)
|
|
|
|
projection_dim = 64
|
|
patch_size = 1
|
|
num_patches =21*21#14*14#28*28#14*14#28*28
|
|
|
|
|
|
class Patches(layers.Layer):
|
|
def __init__(self, **kwargs):
|
|
super(Patches, self).__init__()
|
|
self.patch_size = patch_size
|
|
|
|
def call(self, images):
|
|
batch_size = tf.shape(images)[0]
|
|
patches = tf.image.extract_patches(
|
|
images=images,
|
|
sizes=[1, self.patch_size, self.patch_size, 1],
|
|
strides=[1, self.patch_size, self.patch_size, 1],
|
|
rates=[1, 1, 1, 1],
|
|
padding="VALID",
|
|
)
|
|
patch_dims = patches.shape[-1]
|
|
patches = tf.reshape(patches, [batch_size, -1, patch_dims])
|
|
return patches
|
|
def get_config(self):
|
|
|
|
config = super().get_config().copy()
|
|
config.update({
|
|
'patch_size': self.patch_size,
|
|
})
|
|
return config
|
|
|
|
|
|
class PatchEncoder(layers.Layer):
|
|
def __init__(self, **kwargs):
|
|
super(PatchEncoder, self).__init__()
|
|
self.num_patches = num_patches
|
|
self.projection = layers.Dense(units=projection_dim)
|
|
self.position_embedding = layers.Embedding(
|
|
input_dim=num_patches, output_dim=projection_dim
|
|
)
|
|
|
|
def call(self, patch):
|
|
positions = tf.range(start=0, limit=self.num_patches, delta=1)
|
|
encoded = self.projection(patch) + self.position_embedding(positions)
|
|
return encoded
|
|
def get_config(self):
|
|
|
|
config = super().get_config().copy()
|
|
config.update({
|
|
'num_patches': self.num_patches,
|
|
'projection': self.projection,
|
|
'position_embedding': self.position_embedding,
|
|
})
|
|
return config
|
|
|
|
class Eynollah:
|
|
def __init__(
|
|
self,
|
|
dir_models : str,
|
|
dir_out : Optional[str] = None,
|
|
dir_of_cropped_images : Optional[str] = None,
|
|
extract_only_images : bool =False,
|
|
dir_of_layout : Optional[str] = None,
|
|
dir_of_deskewed : Optional[str] = None,
|
|
dir_of_all : Optional[str] = None,
|
|
dir_save_page : Optional[str] = None,
|
|
enable_plotting : bool = False,
|
|
allow_enhancement : bool = False,
|
|
curved_line : bool = False,
|
|
textline_light : bool = False,
|
|
full_layout : bool = False,
|
|
tables : bool = False,
|
|
right2left : bool = False,
|
|
input_binary : bool = False,
|
|
allow_scaling : bool = False,
|
|
headers_off : bool = False,
|
|
light_version : bool = False,
|
|
ignore_page_extraction : bool = False,
|
|
reading_order_machine_based : bool = False,
|
|
do_ocr : bool = False,
|
|
num_col_upper : Optional[int] = None,
|
|
num_col_lower : Optional[int] = None,
|
|
skip_layout_and_reading_order : bool = False,
|
|
logger : Optional[Logger] = None,
|
|
):
|
|
if skip_layout_and_reading_order:
|
|
textline_light = True
|
|
self.light_version = light_version
|
|
self.dir_out = dir_out
|
|
self.dir_of_all = dir_of_all
|
|
self.dir_save_page = dir_save_page
|
|
self.reading_order_machine_based = reading_order_machine_based
|
|
self.dir_of_deskewed = dir_of_deskewed
|
|
self.dir_of_deskewed = dir_of_deskewed
|
|
self.dir_of_cropped_images=dir_of_cropped_images
|
|
self.dir_of_layout=dir_of_layout
|
|
self.enable_plotting = enable_plotting
|
|
self.allow_enhancement = allow_enhancement
|
|
self.curved_line = curved_line
|
|
self.textline_light = textline_light
|
|
self.full_layout = full_layout
|
|
self.tables = tables
|
|
self.right2left = right2left
|
|
self.input_binary = input_binary
|
|
self.allow_scaling = allow_scaling
|
|
self.headers_off = headers_off
|
|
self.light_version = light_version
|
|
self.extract_only_images = extract_only_images
|
|
self.ignore_page_extraction = ignore_page_extraction
|
|
self.skip_layout_and_reading_order = skip_layout_and_reading_order
|
|
self.ocr = do_ocr
|
|
if num_col_upper:
|
|
self.num_col_upper = int(num_col_upper)
|
|
else:
|
|
self.num_col_upper = num_col_upper
|
|
if num_col_lower:
|
|
self.num_col_lower = int(num_col_lower)
|
|
else:
|
|
self.num_col_lower = num_col_lower
|
|
self.logger = logger if logger else getLogger('eynollah')
|
|
# for parallelization of CPU-intensive tasks:
|
|
self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200)
|
|
atexit.register(self.executor.shutdown)
|
|
self.dir_models = dir_models
|
|
self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425"
|
|
self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425"
|
|
self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425"
|
|
self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425"
|
|
self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425"
|
|
#"/modelens_full_lay_1_3_031124"
|
|
#"/modelens_full_lay_13__3_19_241024"
|
|
#"/model_full_lay_13_241024"
|
|
#"/modelens_full_lay_13_17_231024"
|
|
#"/modelens_full_lay_1_2_221024"
|
|
#"/eynollah-full-regions-1column_20210425"
|
|
self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_1__4_3_091124"
|
|
#self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425"
|
|
self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425"
|
|
self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425"
|
|
self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314"
|
|
self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18"
|
|
self.model_reading_order_dir = dir_models + "/model_mb_ro_aug_2"#"/model_ens_reading_order_machine_based"
|
|
#"/modelens_12sp_elay_0_3_4__3_6_n"
|
|
#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"
|
|
#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"
|
|
#"/modelens_1_2_4_5_early_lay_1_2_spaltige"
|
|
#"/model_3_eraly_layout_no_patches_1_2_spaltige"
|
|
self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"
|
|
##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans"
|
|
#"/modelens_full_lay_1_3_031124"
|
|
#"/modelens_full_lay_13__3_19_241024"
|
|
#"/model_full_lay_13_241024"
|
|
#"/modelens_full_lay_13_17_231024"
|
|
#"/modelens_full_lay_1_2_221024"
|
|
#"/modelens_full_layout_24_till_28"
|
|
#"/model_2_full_layout_new_trans"
|
|
self.model_region_dir_fully = dir_models + "/modelens_full_lay_1__4_3_091124"
|
|
if self.textline_light:
|
|
#"/modelens_textline_1_4_16092024"
|
|
#"/model_textline_ens_3_4_5_6_artificial"
|
|
#"/modelens_textline_1_3_4_20240915"
|
|
#"/model_textline_ens_3_4_5_6_artificial"
|
|
#"/modelens_textline_9_12_13_14_15"
|
|
#"/eynollah-textline_light_20210425"
|
|
self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"
|
|
else:
|
|
#"/eynollah-textline_20210425"
|
|
self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"
|
|
if self.ocr:
|
|
self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
|
|
if self.tables:
|
|
if self.light_version:
|
|
self.model_table_dir = dir_models + "/modelens_table_0t4_201124"
|
|
else:
|
|
self.model_table_dir = dir_models + "/eynollah-tables_20210319"
|
|
|
|
# #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
|
|
# #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True)
|
|
# #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
|
|
# config = tf.compat.v1.ConfigProto()
|
|
# config.gpu_options.allow_growth = True
|
|
# #session = tf.InteractiveSession()
|
|
# session = tf.compat.v1.Session(config=config)
|
|
# set_session(session)
|
|
try:
|
|
for device in tf.config.list_physical_devices('GPU'):
|
|
tf.config.experimental.set_memory_growth(device, True)
|
|
except:
|
|
self.logger.warning("no GPU device available")
|
|
|
|
self.model_page = self.our_load_model(self.model_page_dir)
|
|
self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
|
|
self.model_bin = self.our_load_model(self.model_dir_of_binarization)
|
|
if self.extract_only_images:
|
|
self.model_region = self.our_load_model(self.model_region_dir_p_ens_light_only_images_extraction)
|
|
else:
|
|
self.model_textline = self.our_load_model(self.model_textline_dir)
|
|
if self.light_version:
|
|
self.model_region = self.our_load_model(self.model_region_dir_p_ens_light)
|
|
self.model_region_1_2 = self.our_load_model(self.model_region_dir_p_1_2_sp_np)
|
|
else:
|
|
self.model_region = self.our_load_model(self.model_region_dir_p_ens)
|
|
self.model_region_p2 = self.our_load_model(self.model_region_dir_p2)
|
|
self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement)
|
|
###self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new)
|
|
self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np)
|
|
self.model_region_fl = self.our_load_model(self.model_region_dir_fully)
|
|
if self.reading_order_machine_based:
|
|
self.model_reading_order = self.our_load_model(self.model_reading_order_dir)
|
|
if self.ocr:
|
|
self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
|
|
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
#("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten")
|
|
self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
|
|
if self.tables:
|
|
self.model_table = self.our_load_model(self.model_table_dir)
|
|
|
|
def cache_images(self, image_filename=None, image_pil=None, dpi=None):
|
|
ret = {}
|
|
t_c0 = time.time()
|
|
if image_filename:
|
|
ret['img'] = cv2.imread(image_filename)
|
|
if self.light_version:
|
|
self.dpi = 100
|
|
else:
|
|
self.dpi = check_dpi(image_filename)
|
|
else:
|
|
ret['img'] = pil2cv(image_pil)
|
|
if self.light_version:
|
|
self.dpi = 100
|
|
else:
|
|
self.dpi = check_dpi(image_pil)
|
|
ret['img_grayscale'] = cv2.cvtColor(ret['img'], cv2.COLOR_BGR2GRAY)
|
|
for prefix in ('', '_grayscale'):
|
|
ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8)
|
|
self._imgs = ret
|
|
if dpi is not None:
|
|
self.dpi = dpi
|
|
|
|
def reset_file_name_dir(self, image_filename):
|
|
t_c = time.time()
|
|
self.cache_images(image_filename=image_filename)
|
|
|
|
self.plotter = None if not self.enable_plotting else EynollahPlotter(
|
|
dir_out=self.dir_out,
|
|
dir_of_all=self.dir_of_all,
|
|
dir_save_page=self.dir_save_page,
|
|
dir_of_deskewed=self.dir_of_deskewed,
|
|
dir_of_cropped_images=self.dir_of_cropped_images,
|
|
dir_of_layout=self.dir_of_layout,
|
|
image_filename_stem=Path(Path(image_filename).name).stem)
|
|
|
|
self.writer = EynollahXmlWriter(
|
|
dir_out=self.dir_out,
|
|
image_filename=image_filename,
|
|
curved_line=self.curved_line,
|
|
textline_light = self.textline_light)
|
|
|
|
def imread(self, grayscale=False, uint8=True):
|
|
key = 'img'
|
|
if grayscale:
|
|
key += '_grayscale'
|
|
if uint8:
|
|
key += '_uint8'
|
|
return self._imgs[key].copy()
|
|
|
|
def isNaN(self, num):
|
|
return num != num
|
|
|
|
def predict_enhancement(self, img):
|
|
self.logger.debug("enter predict_enhancement")
|
|
|
|
img_height_model = self.model_enhancement.layers[-1].output_shape[1]
|
|
img_width_model = self.model_enhancement.layers[-1].output_shape[2]
|
|
if img.shape[0] < img_height_model:
|
|
img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST)
|
|
if img.shape[1] < img_width_model:
|
|
img = cv2.resize(img, (img_height_model, img.shape[0]), interpolation=cv2.INTER_NEAREST)
|
|
margin = int(0 * img_width_model)
|
|
width_mid = img_width_model - 2 * margin
|
|
height_mid = img_height_model - 2 * margin
|
|
img = img / 255.
|
|
img_h = img.shape[0]
|
|
img_w = img.shape[1]
|
|
|
|
prediction_true = np.zeros((img_h, img_w, 3))
|
|
nxf = img_w / float(width_mid)
|
|
nyf = img_h / float(height_mid)
|
|
nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf)
|
|
nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf)
|
|
|
|
for i in range(nxf):
|
|
for j in range(nyf):
|
|
if i == 0:
|
|
index_x_d = i * width_mid
|
|
index_x_u = index_x_d + img_width_model
|
|
else:
|
|
index_x_d = i * width_mid
|
|
index_x_u = index_x_d + img_width_model
|
|
if j == 0:
|
|
index_y_d = j * height_mid
|
|
index_y_u = index_y_d + img_height_model
|
|
else:
|
|
index_y_d = j * height_mid
|
|
index_y_u = index_y_d + img_height_model
|
|
|
|
if index_x_u > img_w:
|
|
index_x_u = img_w
|
|
index_x_d = img_w - img_width_model
|
|
if index_y_u > img_h:
|
|
index_y_u = img_h
|
|
index_y_d = img_h - img_height_model
|
|
|
|
img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :]
|
|
label_p_pred = self.model_enhancement.predict(img_patch, verbose=0)
|
|
seg = label_p_pred[0, :, :, :] * 255
|
|
|
|
if i == 0 and j == 0:
|
|
prediction_true[index_y_d + 0:index_y_u - margin,
|
|
index_x_d + 0:index_x_u - margin] = \
|
|
seg[0:-margin or None,
|
|
0:-margin or None]
|
|
elif i == nxf - 1 and j == nyf - 1:
|
|
prediction_true[index_y_d + margin:index_y_u - 0,
|
|
index_x_d + margin:index_x_u - 0] = \
|
|
seg[margin:,
|
|
margin:]
|
|
elif i == 0 and j == nyf - 1:
|
|
prediction_true[index_y_d + margin:index_y_u - 0,
|
|
index_x_d + 0:index_x_u - margin] = \
|
|
seg[margin:,
|
|
0:-margin or None]
|
|
elif i == nxf - 1 and j == 0:
|
|
prediction_true[index_y_d + 0:index_y_u - margin,
|
|
index_x_d + margin:index_x_u - 0] = \
|
|
seg[0:-margin or None,
|
|
margin:]
|
|
elif i == 0 and j != 0 and j != nyf - 1:
|
|
prediction_true[index_y_d + margin:index_y_u - margin,
|
|
index_x_d + 0:index_x_u - margin] = \
|
|
seg[margin:-margin or None,
|
|
0:-margin or None]
|
|
elif i == nxf - 1 and j != 0 and j != nyf - 1:
|
|
prediction_true[index_y_d + margin:index_y_u - margin,
|
|
index_x_d + margin:index_x_u - 0] = \
|
|
seg[margin:-margin or None,
|
|
margin:]
|
|
elif i != 0 and i != nxf - 1 and j == 0:
|
|
prediction_true[index_y_d + 0:index_y_u - margin,
|
|
index_x_d + margin:index_x_u - margin] = \
|
|
seg[0:-margin or None,
|
|
margin:-margin or None]
|
|
elif i != 0 and i != nxf - 1 and j == nyf - 1:
|
|
prediction_true[index_y_d + margin:index_y_u - 0,
|
|
index_x_d + margin:index_x_u - margin] = \
|
|
seg[margin:,
|
|
margin:-margin or None]
|
|
else:
|
|
prediction_true[index_y_d + margin:index_y_u - margin,
|
|
index_x_d + margin:index_x_u - margin] = \
|
|
seg[margin:-margin or None,
|
|
margin:-margin or None]
|
|
|
|
prediction_true = prediction_true.astype(int)
|
|
return prediction_true
|
|
|
|
def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred):
|
|
self.logger.debug("enter calculate_width_height_by_columns")
|
|
if num_col == 1 and width_early < 1100:
|
|
img_w_new = 2000
|
|
elif num_col == 1 and width_early >= 2500:
|
|
img_w_new = 2000
|
|
elif num_col == 1 and width_early >= 1100 and width_early < 2500:
|
|
img_w_new = width_early
|
|
elif num_col == 2 and width_early < 2000:
|
|
img_w_new = 2400
|
|
elif num_col == 2 and width_early >= 3500:
|
|
img_w_new = 2400
|
|
elif num_col == 2 and width_early >= 2000 and width_early < 3500:
|
|
img_w_new = width_early
|
|
elif num_col == 3 and width_early < 2000:
|
|
img_w_new = 3000
|
|
elif num_col == 3 and width_early >= 4000:
|
|
img_w_new = 3000
|
|
elif num_col == 3 and width_early >= 2000 and width_early < 4000:
|
|
img_w_new = width_early
|
|
elif num_col == 4 and width_early < 2500:
|
|
img_w_new = 4000
|
|
elif num_col == 4 and width_early >= 5000:
|
|
img_w_new = 4000
|
|
elif num_col == 4 and width_early >= 2500 and width_early < 5000:
|
|
img_w_new = width_early
|
|
elif num_col == 5 and width_early < 3700:
|
|
img_w_new = 5000
|
|
elif num_col == 5 and width_early >= 7000:
|
|
img_w_new = 5000
|
|
elif num_col == 5 and width_early >= 3700 and width_early < 7000:
|
|
img_w_new = width_early
|
|
elif num_col == 6 and width_early < 4500:
|
|
img_w_new = 6500 # 5400
|
|
else:
|
|
img_w_new = width_early
|
|
img_h_new = img_w_new * img.shape[0] // img.shape[1]
|
|
|
|
if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early:
|
|
img_new = np.copy(img)
|
|
num_column_is_classified = False
|
|
#elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000:
|
|
elif img_h_new >= 8000:
|
|
img_new = np.copy(img)
|
|
num_column_is_classified = False
|
|
else:
|
|
img_new = resize_image(img, img_h_new, img_w_new)
|
|
num_column_is_classified = True
|
|
|
|
return img_new, num_column_is_classified
|
|
|
|
def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label_p_pred):
|
|
self.logger.debug("enter calculate_width_height_by_columns")
|
|
if num_col == 1:
|
|
img_w_new = 1000
|
|
else:
|
|
img_w_new = 1300
|
|
img_h_new = img_w_new * img.shape[0] // img.shape[1]
|
|
|
|
if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early:
|
|
img_new = np.copy(img)
|
|
num_column_is_classified = False
|
|
#elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000:
|
|
elif img_h_new >= 8000:
|
|
img_new = np.copy(img)
|
|
num_column_is_classified = False
|
|
else:
|
|
img_new = resize_image(img, img_h_new, img_w_new)
|
|
num_column_is_classified = True
|
|
|
|
return img_new, num_column_is_classified
|
|
|
|
def calculate_width_height_by_columns_extract_only_images(self, img, num_col, width_early, label_p_pred):
|
|
self.logger.debug("enter calculate_width_height_by_columns")
|
|
if num_col == 1:
|
|
img_w_new = 700
|
|
elif num_col == 2:
|
|
img_w_new = 900
|
|
elif num_col == 3:
|
|
img_w_new = 1500
|
|
elif num_col == 4:
|
|
img_w_new = 1800
|
|
elif num_col == 5:
|
|
img_w_new = 2200
|
|
elif num_col == 6:
|
|
img_w_new = 2500
|
|
img_h_new = img_w_new * img.shape[0] // img.shape[1]
|
|
|
|
img_new = resize_image(img, img_h_new, img_w_new)
|
|
num_column_is_classified = True
|
|
|
|
return img_new, num_column_is_classified
|
|
|
|
def resize_image_with_column_classifier(self, is_image_enhanced, img_bin):
|
|
self.logger.debug("enter resize_image_with_column_classifier")
|
|
if self.input_binary:
|
|
img = np.copy(img_bin)
|
|
else:
|
|
img = self.imread()
|
|
|
|
_, page_coord = self.early_page_for_num_of_column_classification(img)
|
|
|
|
if self.input_binary:
|
|
img_in = np.copy(img)
|
|
img_in = img_in / 255.0
|
|
width_early = img_in.shape[1]
|
|
img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST)
|
|
img_in = img_in.reshape(1, 448, 448, 3)
|
|
else:
|
|
img_1ch = self.imread(grayscale=True, uint8=False)
|
|
width_early = img_1ch.shape[1]
|
|
img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
|
|
|
# plt.imshow(img_1ch)
|
|
# plt.show()
|
|
img_1ch = img_1ch / 255.0
|
|
img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
|
|
|
|
img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
|
|
img_in[0, :, :, 0] = img_1ch[:, :]
|
|
img_in[0, :, :, 1] = img_1ch[:, :]
|
|
img_in[0, :, :, 2] = img_1ch[:, :]
|
|
|
|
label_p_pred = self.model_classifier.predict(img_in, verbose=0)
|
|
num_col = np.argmax(label_p_pred[0]) + 1
|
|
|
|
self.logger.info("Found %s columns (%s)", num_col, label_p_pred)
|
|
img_new, _ = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred)
|
|
|
|
if img_new.shape[1] > img.shape[1]:
|
|
img_new = self.predict_enhancement(img_new)
|
|
is_image_enhanced = True
|
|
|
|
return img, img_new, is_image_enhanced
|
|
|
|
def resize_and_enhance_image_with_column_classifier(self, light_version):
|
|
self.logger.debug("enter resize_and_enhance_image_with_column_classifier")
|
|
dpi = self.dpi
|
|
self.logger.info("Detected %s DPI", dpi)
|
|
if self.input_binary:
|
|
img = self.imread()
|
|
prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5)
|
|
prediction_bin = 255 * (prediction_bin[:,:,0]==0)
|
|
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8)
|
|
img= np.copy(prediction_bin)
|
|
img_bin = prediction_bin
|
|
else:
|
|
img = self.imread()
|
|
img_bin = None
|
|
|
|
width_early = img.shape[1]
|
|
t1 = time.time()
|
|
_, page_coord = self.early_page_for_num_of_column_classification(img_bin)
|
|
|
|
self.image_page_org_size = img[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3], :]
|
|
self.page_coord = page_coord
|
|
|
|
if self.num_col_upper and not self.num_col_lower:
|
|
num_col = self.num_col_upper
|
|
label_p_pred = [np.ones(6)]
|
|
elif self.num_col_lower and not self.num_col_upper:
|
|
num_col = self.num_col_lower
|
|
label_p_pred = [np.ones(6)]
|
|
elif not self.num_col_upper and not self.num_col_lower:
|
|
if self.input_binary:
|
|
img_in = np.copy(img)
|
|
img_in = img_in / 255.0
|
|
img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST)
|
|
img_in = img_in.reshape(1, 448, 448, 3)
|
|
else:
|
|
img_1ch = self.imread(grayscale=True)
|
|
width_early = img_1ch.shape[1]
|
|
img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
|
|
|
img_1ch = img_1ch / 255.0
|
|
img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
|
|
img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
|
|
img_in[0, :, :, 0] = img_1ch[:, :]
|
|
img_in[0, :, :, 1] = img_1ch[:, :]
|
|
img_in[0, :, :, 2] = img_1ch[:, :]
|
|
|
|
label_p_pred = self.model_classifier.predict(img_in, verbose=0)
|
|
num_col = np.argmax(label_p_pred[0]) + 1
|
|
elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower):
|
|
if self.input_binary:
|
|
img_in = np.copy(img)
|
|
img_in = img_in / 255.0
|
|
img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST)
|
|
img_in = img_in.reshape(1, 448, 448, 3)
|
|
else:
|
|
img_1ch = self.imread(grayscale=True)
|
|
width_early = img_1ch.shape[1]
|
|
img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
|
|
|
img_1ch = img_1ch / 255.0
|
|
img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
|
|
img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
|
|
img_in[0, :, :, 0] = img_1ch[:, :]
|
|
img_in[0, :, :, 1] = img_1ch[:, :]
|
|
img_in[0, :, :, 2] = img_1ch[:, :]
|
|
|
|
label_p_pred = self.model_classifier.predict(img_in, verbose=0)
|
|
num_col = np.argmax(label_p_pred[0]) + 1
|
|
|
|
if num_col > self.num_col_upper:
|
|
num_col = self.num_col_upper
|
|
label_p_pred = [np.ones(6)]
|
|
if num_col < self.num_col_lower:
|
|
num_col = self.num_col_lower
|
|
label_p_pred = [np.ones(6)]
|
|
else:
|
|
num_col = self.num_col_upper
|
|
label_p_pred = [np.ones(6)]
|
|
|
|
self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5))
|
|
if not self.extract_only_images:
|
|
if dpi < DPI_THRESHOLD:
|
|
if light_version and num_col in (1,2):
|
|
img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(
|
|
img, num_col, width_early, label_p_pred)
|
|
else:
|
|
img_new, num_column_is_classified = self.calculate_width_height_by_columns(
|
|
img, num_col, width_early, label_p_pred)
|
|
if light_version:
|
|
image_res = np.copy(img_new)
|
|
else:
|
|
image_res = self.predict_enhancement(img_new)
|
|
is_image_enhanced = True
|
|
else:
|
|
if light_version and num_col in (1,2):
|
|
img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(
|
|
img, num_col, width_early, label_p_pred)
|
|
image_res = np.copy(img_new)
|
|
is_image_enhanced = True
|
|
else:
|
|
num_column_is_classified = True
|
|
image_res = np.copy(img)
|
|
is_image_enhanced = False
|
|
else:
|
|
num_column_is_classified = True
|
|
image_res = np.copy(img)
|
|
is_image_enhanced = False
|
|
|
|
self.logger.debug("exit resize_and_enhance_image_with_column_classifier")
|
|
return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin
|
|
|
|
# pylint: disable=attribute-defined-outside-init
|
|
def get_image_and_scales(self, img_org, img_res, scale):
|
|
self.logger.debug("enter get_image_and_scales")
|
|
self.image = np.copy(img_res)
|
|
self.image_org = np.copy(img_org)
|
|
self.height_org = self.image.shape[0]
|
|
self.width_org = self.image.shape[1]
|
|
|
|
self.img_hight_int = int(self.image.shape[0] * scale)
|
|
self.img_width_int = int(self.image.shape[1] * scale)
|
|
self.scale_y = self.img_hight_int / float(self.image.shape[0])
|
|
self.scale_x = self.img_width_int / float(self.image.shape[1])
|
|
|
|
self.image = resize_image(self.image, self.img_hight_int, self.img_width_int)
|
|
|
|
# Also set for the plotter
|
|
if self.plotter:
|
|
self.plotter.image_org = self.image_org
|
|
self.plotter.scale_y = self.scale_y
|
|
self.plotter.scale_x = self.scale_x
|
|
# Also set for the writer
|
|
self.writer.image_org = self.image_org
|
|
self.writer.scale_y = self.scale_y
|
|
self.writer.scale_x = self.scale_x
|
|
self.writer.height_org = self.height_org
|
|
self.writer.width_org = self.width_org
|
|
|
|
def get_image_and_scales_after_enhancing(self, img_org, img_res):
|
|
self.logger.debug("enter get_image_and_scales_after_enhancing")
|
|
self.image = np.copy(img_res)
|
|
self.image = self.image.astype(np.uint8)
|
|
self.image_org = np.copy(img_org)
|
|
self.height_org = self.image_org.shape[0]
|
|
self.width_org = self.image_org.shape[1]
|
|
|
|
self.scale_y = img_res.shape[0] / float(self.image_org.shape[0])
|
|
self.scale_x = img_res.shape[1] / float(self.image_org.shape[1])
|
|
|
|
# Also set for the plotter
|
|
if self.plotter:
|
|
self.plotter.image_org = self.image_org
|
|
self.plotter.scale_y = self.scale_y
|
|
self.plotter.scale_x = self.scale_x
|
|
# Also set for the writer
|
|
self.writer.image_org = self.image_org
|
|
self.writer.scale_y = self.scale_y
|
|
self.writer.scale_x = self.scale_x
|
|
self.writer.height_org = self.height_org
|
|
self.writer.width_org = self.width_org
|
|
|
|
def do_prediction(
|
|
self, patches, img, model,
|
|
n_batch_inference=1, marginal_of_patch_percent=0.1,
|
|
thresholding_for_some_classes_in_light_version=False,
|
|
thresholding_for_artificial_class_in_light_version=False):
|
|
|
|
self.logger.debug("enter do_prediction")
|
|
img_height_model = model.layers[-1].output_shape[1]
|
|
img_width_model = model.layers[-1].output_shape[2]
|
|
|
|
if not patches:
|
|
img_h_page = img.shape[0]
|
|
img_w_page = img.shape[1]
|
|
img = img / float(255.0)
|
|
img = resize_image(img, img_height_model, img_width_model)
|
|
|
|
label_p_pred = model.predict(img[np.newaxis], verbose=0)
|
|
seg = np.argmax(label_p_pred, axis=3)[0]
|
|
|
|
if thresholding_for_artificial_class_in_light_version:
|
|
seg_art = label_p_pred[0,:,:,2]
|
|
|
|
seg_art[seg_art<0.2] = 0
|
|
seg_art[seg_art>0] =1
|
|
|
|
seg[seg_art==1]=2
|
|
seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
|
|
prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8)
|
|
return prediction_true
|
|
|
|
if img.shape[0] < img_height_model:
|
|
img = resize_image(img, img_height_model, img.shape[1])
|
|
if img.shape[1] < img_width_model:
|
|
img = resize_image(img, img.shape[0], img_width_model)
|
|
|
|
self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model)
|
|
margin = int(marginal_of_patch_percent * img_height_model)
|
|
width_mid = img_width_model - 2 * margin
|
|
height_mid = img_height_model - 2 * margin
|
|
img = img / 255.
|
|
#img = img.astype(np.float16)
|
|
img_h = img.shape[0]
|
|
img_w = img.shape[1]
|
|
prediction_true = np.zeros((img_h, img_w, 3))
|
|
mask_true = np.zeros((img_h, img_w))
|
|
nxf = img_w / float(width_mid)
|
|
nyf = img_h / float(height_mid)
|
|
nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf)
|
|
nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf)
|
|
|
|
list_i_s = []
|
|
list_j_s = []
|
|
list_x_u = []
|
|
list_x_d = []
|
|
list_y_u = []
|
|
list_y_d = []
|
|
|
|
batch_indexer = 0
|
|
img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3))
|
|
for i in range(nxf):
|
|
for j in range(nyf):
|
|
if i == 0:
|
|
index_x_d = i * width_mid
|
|
index_x_u = index_x_d + img_width_model
|
|
else:
|
|
index_x_d = i * width_mid
|
|
index_x_u = index_x_d + img_width_model
|
|
if j == 0:
|
|
index_y_d = j * height_mid
|
|
index_y_u = index_y_d + img_height_model
|
|
else:
|
|
index_y_d = j * height_mid
|
|
index_y_u = index_y_d + img_height_model
|
|
if index_x_u > img_w:
|
|
index_x_u = img_w
|
|
index_x_d = img_w - img_width_model
|
|
if index_y_u > img_h:
|
|
index_y_u = img_h
|
|
index_y_d = img_h - img_height_model
|
|
|
|
list_i_s.append(i)
|
|
list_j_s.append(j)
|
|
list_x_u.append(index_x_u)
|
|
list_x_d.append(index_x_d)
|
|
list_y_d.append(index_y_d)
|
|
list_y_u.append(index_y_u)
|
|
|
|
img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
|
|
batch_indexer += 1
|
|
|
|
if (batch_indexer == n_batch_inference or
|
|
# last batch
|
|
i == nxf - 1 and j == nyf - 1):
|
|
self.logger.debug("predicting patches on %s", str(img_patch.shape))
|
|
label_p_pred = model.predict(img_patch, verbose=0)
|
|
seg = np.argmax(label_p_pred, axis=3)
|
|
|
|
if thresholding_for_some_classes_in_light_version:
|
|
seg_not_base = label_p_pred[:,:,:,4]
|
|
seg_not_base[seg_not_base>0.03] =1
|
|
seg_not_base[seg_not_base<1] =0
|
|
|
|
seg_line = label_p_pred[:,:,:,3]
|
|
seg_line[seg_line>0.1] =1
|
|
seg_line[seg_line<1] =0
|
|
|
|
seg_background = label_p_pred[:,:,:,0]
|
|
seg_background[seg_background>0.25] =1
|
|
seg_background[seg_background<1] =0
|
|
|
|
seg[seg_not_base==1]=4
|
|
seg[seg_background==1]=0
|
|
seg[(seg_line==1) & (seg==0)]=3
|
|
if thresholding_for_artificial_class_in_light_version:
|
|
seg_art = label_p_pred[:,:,:,2]
|
|
|
|
seg_art[seg_art<0.2] = 0
|
|
seg_art[seg_art>0] =1
|
|
|
|
seg[seg_art==1]=2
|
|
|
|
indexer_inside_batch = 0
|
|
for i_batch, j_batch in zip(list_i_s, list_j_s):
|
|
seg_in = seg[indexer_inside_batch]
|
|
|
|
index_y_u_in = list_y_u[indexer_inside_batch]
|
|
index_y_d_in = list_y_d[indexer_inside_batch]
|
|
|
|
index_x_u_in = list_x_u[indexer_inside_batch]
|
|
index_x_d_in = list_x_d[indexer_inside_batch]
|
|
|
|
if i_batch == 0 and j_batch == 0:
|
|
prediction_true[index_y_d_in + 0:index_y_u_in - margin,
|
|
index_x_d_in + 0:index_x_u_in - margin] = \
|
|
seg_in[0:-margin or None,
|
|
0:-margin or None,
|
|
np.newaxis]
|
|
elif i_batch == nxf - 1 and j_batch == nyf - 1:
|
|
prediction_true[index_y_d_in + margin:index_y_u_in - 0,
|
|
index_x_d_in + margin:index_x_u_in - 0] = \
|
|
seg_in[margin:,
|
|
margin:,
|
|
np.newaxis]
|
|
elif i_batch == 0 and j_batch == nyf - 1:
|
|
prediction_true[index_y_d_in + margin:index_y_u_in - 0,
|
|
index_x_d_in + 0:index_x_u_in - margin] = \
|
|
seg_in[margin:,
|
|
0:-margin or None,
|
|
np.newaxis]
|
|
elif i_batch == nxf - 1 and j_batch == 0:
|
|
prediction_true[index_y_d_in + 0:index_y_u_in - margin,
|
|
index_x_d_in + margin:index_x_u_in - 0] = \
|
|
seg_in[0:-margin or None,
|
|
margin:,
|
|
np.newaxis]
|
|
elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1:
|
|
prediction_true[index_y_d_in + margin:index_y_u_in - margin,
|
|
index_x_d_in + 0:index_x_u_in - margin] = \
|
|
seg_in[margin:-margin or None,
|
|
0:-margin or None,
|
|
np.newaxis]
|
|
elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1:
|
|
prediction_true[index_y_d_in + margin:index_y_u_in - margin,
|
|
index_x_d_in + margin:index_x_u_in - 0] = \
|
|
seg_in[margin:-margin or None,
|
|
margin:,
|
|
np.newaxis]
|
|
elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0:
|
|
prediction_true[index_y_d_in + 0:index_y_u_in - margin,
|
|
index_x_d_in + margin:index_x_u_in - margin] = \
|
|
seg_in[0:-margin or None,
|
|
margin:-margin or None,
|
|
np.newaxis]
|
|
elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1:
|
|
prediction_true[index_y_d_in + margin:index_y_u_in - 0,
|
|
index_x_d_in + margin:index_x_u_in - margin] = \
|
|
seg_in[margin:,
|
|
margin:-margin or None,
|
|
np.newaxis]
|
|
else:
|
|
prediction_true[index_y_d_in + margin:index_y_u_in - margin,
|
|
index_x_d_in + margin:index_x_u_in - margin] = \
|
|
seg_in[margin:-margin or None,
|
|
margin:-margin or None,
|
|
np.newaxis]
|
|
indexer_inside_batch += 1
|
|
|
|
|
|
list_i_s = []
|
|
list_j_s = []
|
|
list_x_u = []
|
|
list_x_d = []
|
|
list_y_u = []
|
|
list_y_d = []
|
|
|
|
batch_indexer = 0
|
|
img_patch[:] = 0
|
|
|
|
prediction_true = prediction_true.astype(np.uint8)
|
|
#del model
|
|
gc.collect()
|
|
return prediction_true
|
|
|
|
def do_padding_with_scale(self, img, scale):
|
|
h_n = int(img.shape[0]*scale)
|
|
w_n = int(img.shape[1]*scale)
|
|
|
|
channel0_avg = int( np.mean(img[:,:,0]) )
|
|
channel1_avg = int( np.mean(img[:,:,1]) )
|
|
channel2_avg = int( np.mean(img[:,:,2]) )
|
|
|
|
h_diff = img.shape[0] - h_n
|
|
w_diff = img.shape[1] - w_n
|
|
|
|
h_start = int(0.5 * h_diff)
|
|
w_start = int(0.5 * w_diff)
|
|
|
|
img_res = resize_image(img, h_n, w_n)
|
|
#label_res = resize_image(label, h_n, w_n)
|
|
|
|
img_scaled_padded = np.copy(img)
|
|
|
|
#label_scaled_padded = np.zeros(label.shape)
|
|
|
|
img_scaled_padded[:,:,0] = channel0_avg
|
|
img_scaled_padded[:,:,1] = channel1_avg
|
|
img_scaled_padded[:,:,2] = channel2_avg
|
|
|
|
img_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = img_res[:,:,:]
|
|
#label_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = label_res[:,:,:]
|
|
|
|
return img_scaled_padded#, label_scaled_padded
|
|
|
|
def do_prediction_new_concept_scatter_nd(
|
|
self, patches, img, model,
|
|
n_batch_inference=1, marginal_of_patch_percent=0.1,
|
|
thresholding_for_some_classes_in_light_version=False,
|
|
thresholding_for_artificial_class_in_light_version=False):
|
|
|
|
self.logger.debug("enter do_prediction_new_concept")
|
|
img_height_model = model.layers[-1].output_shape[1]
|
|
img_width_model = model.layers[-1].output_shape[2]
|
|
|
|
if not patches:
|
|
img_h_page = img.shape[0]
|
|
img_w_page = img.shape[1]
|
|
img = img / 255.0
|
|
img = resize_image(img, img_height_model, img_width_model)
|
|
|
|
label_p_pred = model.predict(img[np.newaxis], verbose=0)
|
|
seg = np.argmax(label_p_pred, axis=3)[0]
|
|
|
|
if thresholding_for_artificial_class_in_light_version:
|
|
#seg_text = label_p_pred[0,:,:,1]
|
|
#seg_text[seg_text<0.2] =0
|
|
#seg_text[seg_text>0] =1
|
|
#seg[seg_text==1]=1
|
|
|
|
seg_art = label_p_pred[0,:,:,4]
|
|
seg_art[seg_art<0.2] =0
|
|
seg_art[seg_art>0] =1
|
|
seg[seg_art==1]=4
|
|
|
|
seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
|
|
prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8)
|
|
return prediction_true
|
|
|
|
if img.shape[0] < img_height_model:
|
|
img = resize_image(img, img_height_model, img.shape[1])
|
|
if img.shape[1] < img_width_model:
|
|
img = resize_image(img, img.shape[0], img_width_model)
|
|
|
|
self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model)
|
|
##margin = int(marginal_of_patch_percent * img_height_model)
|
|
#width_mid = img_width_model - 2 * margin
|
|
#height_mid = img_height_model - 2 * margin
|
|
img = img / 255.0
|
|
img = img.astype(np.float16)
|
|
img_h = img.shape[0]
|
|
img_w = img.shape[1]
|
|
|
|
stride_x = img_width_model - 100
|
|
stride_y = img_height_model - 100
|
|
|
|
one_tensor = tf.ones_like(img)
|
|
img_patches, one_patches = tf.image.extract_patches(
|
|
images=[img, one_tensor],
|
|
sizes=[1, img_height_model, img_width_model, 1],
|
|
strides=[1, stride_y, stride_x, 1],
|
|
rates=[1, 1, 1, 1],
|
|
padding='SAME')
|
|
img_patches = tf.squeeze(img_patches)
|
|
one_patches = tf.squeeze(one_patches)
|
|
img_patches_resh = tf.reshape(img_patches, shape=(img_patches.shape[0] * img_patches.shape[1],
|
|
img_height_model, img_width_model, 3))
|
|
pred_patches = model.predict(img_patches_resh, batch_size=n_batch_inference)
|
|
one_patches = tf.reshape(one_patches, shape=(img_patches.shape[0] * img_patches.shape[1],
|
|
img_height_model, img_width_model, 3))
|
|
x = tf.range(img.shape[1])
|
|
y = tf.range(img.shape[0])
|
|
x, y = tf.meshgrid(x, y)
|
|
indices = tf.stack([y, x], axis=-1)
|
|
|
|
indices_patches = tf.image.extract_patches(
|
|
images=tf.expand_dims(indices, axis=0),
|
|
sizes=[1, img_height_model, img_width_model, 1],
|
|
strides=[1, stride_y, stride_x, 1],
|
|
rates=[1, 1, 1, 1],
|
|
padding='SAME')
|
|
indices_patches = tf.squeeze(indices_patches)
|
|
indices_patches = tf.reshape(indices_patches, shape=(img_patches.shape[0] * img_patches.shape[1],
|
|
img_height_model, img_width_model, 2))
|
|
margin_y = int( 0.5 * (img_height_model - stride_y) )
|
|
margin_x = int( 0.5 * (img_width_model - stride_x) )
|
|
|
|
mask_margin = np.zeros((img_height_model, img_width_model))
|
|
mask_margin[margin_y:img_height_model - margin_y,
|
|
margin_x:img_width_model - margin_x] = 1
|
|
|
|
indices_patches_array = indices_patches.numpy()
|
|
for i in range(indices_patches_array.shape[0]):
|
|
indices_patches_array[i,:,:,0] = indices_patches_array[i,:,:,0]*mask_margin
|
|
indices_patches_array[i,:,:,1] = indices_patches_array[i,:,:,1]*mask_margin
|
|
|
|
reconstructed = tf.scatter_nd(
|
|
indices=indices_patches_array,
|
|
updates=pred_patches,
|
|
shape=(img.shape[0], img.shape[1], pred_patches.shape[-1])).numpy()
|
|
|
|
prediction_true = np.argmax(reconstructed, axis=2).astype(np.uint8)
|
|
gc.collect()
|
|
return np.repeat(prediction_true[:, :, np.newaxis], 3, axis=2)
|
|
|
|
def do_prediction_new_concept(
|
|
self, patches, img, model,
|
|
n_batch_inference=1, marginal_of_patch_percent=0.1,
|
|
thresholding_for_some_classes_in_light_version=False,
|
|
thresholding_for_artificial_class_in_light_version=False):
|
|
|
|
self.logger.debug("enter do_prediction_new_concept")
|
|
img_height_model = model.layers[-1].output_shape[1]
|
|
img_width_model = model.layers[-1].output_shape[2]
|
|
|
|
if not patches:
|
|
img_h_page = img.shape[0]
|
|
img_w_page = img.shape[1]
|
|
img = img / 255.0
|
|
img = resize_image(img, img_height_model, img_width_model)
|
|
|
|
label_p_pred = model.predict(img[np.newaxis], verbose=0)
|
|
seg = np.argmax(label_p_pred, axis=3)[0]
|
|
|
|
if thresholding_for_artificial_class_in_light_version:
|
|
#seg_text = label_p_pred[0,:,:,1]
|
|
#seg_text[seg_text<0.2] =0
|
|
#seg_text[seg_text>0] =1
|
|
#seg[seg_text==1]=1
|
|
|
|
seg_art = label_p_pred[0,:,:,4]
|
|
seg_art[seg_art<0.2] =0
|
|
seg_art[seg_art>0] =1
|
|
seg[seg_art==1]=4
|
|
|
|
seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
|
|
prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8)
|
|
return prediction_true , resize_image(label_p_pred[0, :, :, 1] , img_h_page, img_w_page)
|
|
|
|
if img.shape[0] < img_height_model:
|
|
img = resize_image(img, img_height_model, img.shape[1])
|
|
if img.shape[1] < img_width_model:
|
|
img = resize_image(img, img.shape[0], img_width_model)
|
|
|
|
self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model)
|
|
margin = int(marginal_of_patch_percent * img_height_model)
|
|
width_mid = img_width_model - 2 * margin
|
|
height_mid = img_height_model - 2 * margin
|
|
img = img / 255.0
|
|
img = img.astype(np.float16)
|
|
img_h = img.shape[0]
|
|
img_w = img.shape[1]
|
|
prediction_true = np.zeros((img_h, img_w, 3))
|
|
confidence_matrix = np.zeros((img_h, img_w))
|
|
mask_true = np.zeros((img_h, img_w))
|
|
nxf = img_w / float(width_mid)
|
|
nyf = img_h / float(height_mid)
|
|
nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf)
|
|
nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf)
|
|
|
|
list_i_s = []
|
|
list_j_s = []
|
|
list_x_u = []
|
|
list_x_d = []
|
|
list_y_u = []
|
|
list_y_d = []
|
|
|
|
batch_indexer = 0
|
|
img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3))
|
|
for i in range(nxf):
|
|
for j in range(nyf):
|
|
if i == 0:
|
|
index_x_d = i * width_mid
|
|
index_x_u = index_x_d + img_width_model
|
|
else:
|
|
index_x_d = i * width_mid
|
|
index_x_u = index_x_d + img_width_model
|
|
if j == 0:
|
|
index_y_d = j * height_mid
|
|
index_y_u = index_y_d + img_height_model
|
|
else:
|
|
index_y_d = j * height_mid
|
|
index_y_u = index_y_d + img_height_model
|
|
if index_x_u > img_w:
|
|
index_x_u = img_w
|
|
index_x_d = img_w - img_width_model
|
|
if index_y_u > img_h:
|
|
index_y_u = img_h
|
|
index_y_d = img_h - img_height_model
|
|
|
|
list_i_s.append(i)
|
|
list_j_s.append(j)
|
|
list_x_u.append(index_x_u)
|
|
list_x_d.append(index_x_d)
|
|
list_y_d.append(index_y_d)
|
|
list_y_u.append(index_y_u)
|
|
|
|
img_patch[batch_indexer] = img[index_y_d:index_y_u, index_x_d:index_x_u]
|
|
batch_indexer += 1
|
|
|
|
if (batch_indexer == n_batch_inference or
|
|
# last batch
|
|
i == nxf - 1 and j == nyf - 1):
|
|
self.logger.debug("predicting patches on %s", str(img_patch.shape))
|
|
label_p_pred = model.predict(img_patch,verbose=0)
|
|
seg = np.argmax(label_p_pred, axis=3)
|
|
|
|
if thresholding_for_some_classes_in_light_version:
|
|
seg_art = label_p_pred[:,:,:,4]
|
|
seg_art[seg_art<0.2] =0
|
|
seg_art[seg_art>0] =1
|
|
|
|
seg_line = label_p_pred[:,:,:,3]
|
|
seg_line[seg_line>0.5] =1#seg_line[seg_line>0.1] =1
|
|
seg_line[seg_line<1] =0
|
|
|
|
seg[seg_art==1]=4
|
|
seg[(seg_line==1) & (seg==0)]=3
|
|
if thresholding_for_artificial_class_in_light_version:
|
|
seg_art = label_p_pred[:,:,:,2]
|
|
|
|
seg_art[seg_art<0.2] = 0
|
|
seg_art[seg_art>0] =1
|
|
|
|
seg[seg_art==1]=2
|
|
|
|
indexer_inside_batch = 0
|
|
for i_batch, j_batch in zip(list_i_s, list_j_s):
|
|
seg_in = seg[indexer_inside_batch]
|
|
|
|
index_y_u_in = list_y_u[indexer_inside_batch]
|
|
index_y_d_in = list_y_d[indexer_inside_batch]
|
|
|
|
index_x_u_in = list_x_u[indexer_inside_batch]
|
|
index_x_d_in = list_x_d[indexer_inside_batch]
|
|
|
|
if i_batch == 0 and j_batch == 0:
|
|
prediction_true[index_y_d_in + 0:index_y_u_in - margin,
|
|
index_x_d_in + 0:index_x_u_in - margin] = \
|
|
seg_in[0:-margin or None,
|
|
0:-margin or None,
|
|
np.newaxis]
|
|
confidence_matrix[index_y_d_in + 0:index_y_u_in - margin,
|
|
index_x_d_in + 0:index_x_u_in - margin] = \
|
|
label_p_pred[0, 0:-margin or None,
|
|
0:-margin or None,
|
|
1]
|
|
elif i_batch == nxf - 1 and j_batch == nyf - 1:
|
|
prediction_true[index_y_d_in + margin:index_y_u_in - 0,
|
|
index_x_d_in + margin:index_x_u_in - 0] = \
|
|
seg_in[margin:,
|
|
margin:,
|
|
np.newaxis]
|
|
confidence_matrix[index_y_d_in + margin:index_y_u_in - 0,
|
|
index_x_d_in + margin:index_x_u_in - 0] = \
|
|
label_p_pred[0, margin:,
|
|
margin:,
|
|
1]
|
|
elif i_batch == 0 and j_batch == nyf - 1:
|
|
prediction_true[index_y_d_in + margin:index_y_u_in - 0,
|
|
index_x_d_in + 0:index_x_u_in - margin] = \
|
|
seg_in[margin:,
|
|
0:-margin or None,
|
|
np.newaxis]
|
|
confidence_matrix[index_y_d_in + margin:index_y_u_in - 0,
|
|
index_x_d_in + 0:index_x_u_in - margin] = \
|
|
label_p_pred[0, margin:,
|
|
0:-margin or None,
|
|
1]
|
|
elif i_batch == nxf - 1 and j_batch == 0:
|
|
prediction_true[index_y_d_in + 0:index_y_u_in - margin,
|
|
index_x_d_in + margin:index_x_u_in - 0] = \
|
|
seg_in[0:-margin or None,
|
|
margin:,
|
|
np.newaxis]
|
|
confidence_matrix[index_y_d_in + 0:index_y_u_in - margin,
|
|
index_x_d_in + margin:index_x_u_in - 0] = \
|
|
label_p_pred[0, 0:-margin or None,
|
|
margin:,
|
|
1]
|
|
elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1:
|
|
prediction_true[index_y_d_in + margin:index_y_u_in - margin,
|
|
index_x_d_in + 0:index_x_u_in - margin] = \
|
|
seg_in[margin:-margin or None,
|
|
0:-margin or None,
|
|
np.newaxis]
|
|
confidence_matrix[index_y_d_in + margin:index_y_u_in - margin,
|
|
index_x_d_in + 0:index_x_u_in - margin] = \
|
|
label_p_pred[0, margin:-margin or None,
|
|
0:-margin or None,
|
|
1]
|
|
elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1:
|
|
prediction_true[index_y_d_in + margin:index_y_u_in - margin,
|
|
index_x_d_in + margin:index_x_u_in - 0] = \
|
|
seg_in[margin:-margin or None,
|
|
margin:,
|
|
np.newaxis]
|
|
confidence_matrix[index_y_d_in + margin:index_y_u_in - margin,
|
|
index_x_d_in + margin:index_x_u_in - 0] = \
|
|
label_p_pred[0, margin:-margin or None,
|
|
margin:,
|
|
1]
|
|
elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0:
|
|
prediction_true[index_y_d_in + 0:index_y_u_in - margin,
|
|
index_x_d_in + margin:index_x_u_in - margin] = \
|
|
seg_in[0:-margin or None,
|
|
margin:-margin or None,
|
|
np.newaxis]
|
|
confidence_matrix[index_y_d_in + 0:index_y_u_in - margin,
|
|
index_x_d_in + margin:index_x_u_in - margin] = \
|
|
label_p_pred[0, 0:-margin or None,
|
|
margin:-margin or None,
|
|
1]
|
|
elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1:
|
|
prediction_true[index_y_d_in + margin:index_y_u_in - 0,
|
|
index_x_d_in + margin:index_x_u_in - margin] = \
|
|
seg_in[margin:,
|
|
margin:-margin or None,
|
|
np.newaxis]
|
|
confidence_matrix[index_y_d_in + margin:index_y_u_in - 0,
|
|
index_x_d_in + margin:index_x_u_in - margin] = \
|
|
label_p_pred[0, margin:,
|
|
margin:-margin or None,
|
|
1]
|
|
else:
|
|
prediction_true[index_y_d_in + margin:index_y_u_in - margin,
|
|
index_x_d_in + margin:index_x_u_in - margin] = \
|
|
seg_in[margin:-margin or None,
|
|
margin:-margin or None,
|
|
np.newaxis]
|
|
confidence_matrix[index_y_d_in + margin:index_y_u_in - margin,
|
|
index_x_d_in + margin:index_x_u_in - margin] = \
|
|
label_p_pred[0, margin:-margin or None,
|
|
margin:-margin or None,
|
|
1]
|
|
indexer_inside_batch += 1
|
|
|
|
list_i_s = []
|
|
list_j_s = []
|
|
list_x_u = []
|
|
list_x_d = []
|
|
list_y_u = []
|
|
list_y_d = []
|
|
|
|
batch_indexer = 0
|
|
img_patch[:] = 0
|
|
|
|
prediction_true = prediction_true.astype(np.uint8)
|
|
gc.collect()
|
|
return prediction_true, confidence_matrix
|
|
|
|
def extract_page(self):
|
|
self.logger.debug("enter extract_page")
|
|
cont_page = []
|
|
if not self.ignore_page_extraction:
|
|
img = cv2.GaussianBlur(self.image, (5, 5), 0)
|
|
img_page_prediction = self.do_prediction(False, img, self.model_page)
|
|
imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
|
|
_, thresh = cv2.threshold(imgray, 0, 255, 0)
|
|
thresh = cv2.dilate(thresh, KERNEL, iterations=3)
|
|
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
|
|
|
if len(contours)>0:
|
|
cnt_size = np.array([cv2.contourArea(contours[j])
|
|
for j in range(len(contours))])
|
|
cnt = contours[np.argmax(cnt_size)]
|
|
x, y, w, h = cv2.boundingRect(cnt)
|
|
if x <= 30:
|
|
w += x
|
|
x = 0
|
|
if (self.image.shape[1] - (x + w)) <= 30:
|
|
w = w + (self.image.shape[1] - (x + w))
|
|
if y <= 30:
|
|
h = h + y
|
|
y = 0
|
|
if (self.image.shape[0] - (y + h)) <= 30:
|
|
h = h + (self.image.shape[0] - (y + h))
|
|
box = [x, y, w, h]
|
|
else:
|
|
box = [0, 0, img.shape[1], img.shape[0]]
|
|
cropped_page, page_coord = crop_image_inside_box(box, self.image)
|
|
cont_page.append(np.array([[page_coord[2], page_coord[0]],
|
|
[page_coord[3], page_coord[0]],
|
|
[page_coord[3], page_coord[1]],
|
|
[page_coord[2], page_coord[1]]]))
|
|
self.logger.debug("exit extract_page")
|
|
else:
|
|
box = [0, 0, self.image.shape[1], self.image.shape[0]]
|
|
cropped_page, page_coord = crop_image_inside_box(box, self.image)
|
|
cont_page.append(np.array([[page_coord[2], page_coord[0]],
|
|
[page_coord[3], page_coord[0]],
|
|
[page_coord[3], page_coord[1]],
|
|
[page_coord[2], page_coord[1]]]))
|
|
return cropped_page, page_coord, cont_page
|
|
|
|
def early_page_for_num_of_column_classification(self,img_bin):
|
|
if not self.ignore_page_extraction:
|
|
self.logger.debug("enter early_page_for_num_of_column_classification")
|
|
if self.input_binary:
|
|
img = np.copy(img_bin).astype(np.uint8)
|
|
else:
|
|
img = self.imread()
|
|
img = cv2.GaussianBlur(img, (5, 5), 0)
|
|
img_page_prediction = self.do_prediction(False, img, self.model_page)
|
|
|
|
imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
|
|
_, thresh = cv2.threshold(imgray, 0, 255, 0)
|
|
thresh = cv2.dilate(thresh, KERNEL, iterations=3)
|
|
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
|
if len(contours)>0:
|
|
cnt_size = np.array([cv2.contourArea(contours[j])
|
|
for j in range(len(contours))])
|
|
cnt = contours[np.argmax(cnt_size)]
|
|
box = cv2.boundingRect(cnt)
|
|
else:
|
|
box = [0, 0, img.shape[1], img.shape[0]]
|
|
cropped_page, page_coord = crop_image_inside_box(box, img)
|
|
|
|
self.logger.debug("exit early_page_for_num_of_column_classification")
|
|
else:
|
|
img = self.imread()
|
|
box = [0, 0, img.shape[1], img.shape[0]]
|
|
cropped_page, page_coord = crop_image_inside_box(box, img)
|
|
return cropped_page, page_coord
|
|
|
|
def extract_text_regions_new(self, img, patches, cols):
|
|
self.logger.debug("enter extract_text_regions")
|
|
img_height_h = img.shape[0]
|
|
img_width_h = img.shape[1]
|
|
model_region = self.model_region_fl if patches else self.model_region_fl_np
|
|
|
|
if self.light_version:
|
|
pass
|
|
elif not patches:
|
|
img = otsu_copy_binary(img).astype(np.uint8)
|
|
prediction_regions = None
|
|
elif cols:
|
|
img = otsu_copy_binary(img).astype(np.uint8)
|
|
if cols == 1:
|
|
img = resize_image(img, int(img_height_h * 1000 / float(img_width_h)), 1000).astype(np.uint8)
|
|
elif cols == 2:
|
|
img = resize_image(img, int(img_height_h * 1300 / float(img_width_h)), 1300).astype(np.uint8)
|
|
elif cols == 3:
|
|
img = resize_image(img, int(img_height_h * 1600 / float(img_width_h)), 1600).astype(np.uint8)
|
|
elif cols == 4:
|
|
img = resize_image(img, int(img_height_h * 1900 / float(img_width_h)), 1900).astype(np.uint8)
|
|
elif cols == 5:
|
|
img = resize_image(img, int(img_height_h * 2200 / float(img_width_h)), 2200).astype(np.uint8)
|
|
else:
|
|
img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500).astype(np.uint8)
|
|
|
|
prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1, n_batch_inference=3)
|
|
prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h)
|
|
self.logger.debug("exit extract_text_regions")
|
|
return prediction_regions, prediction_regions
|
|
|
|
def extract_text_regions(self, img, patches, cols):
|
|
self.logger.debug("enter extract_text_regions")
|
|
img_height_h = img.shape[0]
|
|
img_width_h = img.shape[1]
|
|
model_region = self.model_region_fl if patches else self.model_region_fl_np
|
|
|
|
if not patches:
|
|
img = otsu_copy_binary(img)
|
|
img = img.astype(np.uint8)
|
|
prediction_regions2 = None
|
|
elif cols:
|
|
if cols == 1:
|
|
img_height_new = int(img_height_h * 0.7)
|
|
img_width_new = int(img_width_h * 0.7)
|
|
elif cols == 2:
|
|
img_height_new = int(img_height_h * 0.4)
|
|
img_width_new = int(img_width_h * 0.4)
|
|
else:
|
|
img_height_new = int(img_height_h * 0.3)
|
|
img_width_new = int(img_width_h * 0.3)
|
|
img2 = otsu_copy_binary(img)
|
|
img2 = img2.astype(np.uint8)
|
|
img2 = resize_image(img2, img_height_new, img_width_new)
|
|
prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=0.1)
|
|
prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h)
|
|
|
|
img = otsu_copy_binary(img).astype(np.uint8)
|
|
if cols == 1:
|
|
img = resize_image(img, int(img_height_h * 0.5), int(img_width_h * 0.5)).astype(np.uint8)
|
|
elif cols == 2 and img_width_h >= 2000:
|
|
img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)).astype(np.uint8)
|
|
elif cols == 3 and ((self.scale_x == 1 and img_width_h > 3000) or
|
|
(self.scale_x != 1 and img_width_h > 2800)):
|
|
img = resize_image(img, 2800 * img_height_h // img_width_h, 2800).astype(np.uint8)
|
|
elif cols == 4 and ((self.scale_x == 1 and img_width_h > 4000) or
|
|
(self.scale_x != 1 and img_width_h > 3700)):
|
|
img = resize_image(img, 3700 * img_height_h // img_width_h, 3700).astype(np.uint8)
|
|
elif cols == 4:
|
|
img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)).astype(np.uint8)
|
|
elif cols == 5 and self.scale_x == 1 and img_width_h > 5000:
|
|
img = resize_image(img, int(img_height_h * 0.7), int(img_width_h * 0.7)).astype(np.uint8)
|
|
elif cols == 5:
|
|
img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)).astype(np.uint8)
|
|
elif img_width_h > 5600:
|
|
img = resize_image(img, 5600 * img_height_h // img_width_h, 5600).astype(np.uint8)
|
|
else:
|
|
img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)).astype(np.uint8)
|
|
|
|
prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1)
|
|
prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h)
|
|
self.logger.debug("exit extract_text_regions")
|
|
return prediction_regions, prediction_regions2
|
|
|
|
def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
|
|
|
|
polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001)
|
|
M_main_tot = [cv2.moments(polygons_of_textlines[j])
|
|
for j in range(len(polygons_of_textlines))]
|
|
cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))]
|
|
cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))]
|
|
|
|
args_textlines = np.array(range(len(polygons_of_textlines)))
|
|
all_found_textline_polygons = []
|
|
slopes = []
|
|
all_box_coord =[]
|
|
|
|
for index, con_region_ind in enumerate(contours_par):
|
|
results = [cv2.pointPolygonTest(con_region_ind, (cx_main_tot[ind], cy_main_tot[ind]), False)
|
|
for ind in args_textlines ]
|
|
results = np.array(results)
|
|
indexes_in = args_textlines[results==1]
|
|
textlines_ins = [polygons_of_textlines[ind] for ind in indexes_in]
|
|
|
|
all_found_textline_polygons.append(textlines_ins[::-1])
|
|
slopes.append(slope_deskew)
|
|
|
|
_, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated)
|
|
all_box_coord.append(crop_coor)
|
|
|
|
return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes
|
|
|
|
def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
|
|
if not len(contours):
|
|
return [], [], [], [], [], [], []
|
|
self.logger.debug("enter get_slopes_and_deskew_new_light")
|
|
results = self.executor.map(partial(do_work_of_slopes_new_light,
|
|
textline_mask_tot_ea=textline_mask_tot,
|
|
image_page_rotated=image_page_rotated,
|
|
slope_deskew=slope_deskew,textline_light=self.textline_light,
|
|
logger=self.logger,),
|
|
boxes, contours, contours_par, range(len(contours_par)))
|
|
#textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results)
|
|
self.logger.debug("exit get_slopes_and_deskew_new_light")
|
|
return tuple(zip(*results))
|
|
|
|
def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
|
|
if not len(contours):
|
|
return [], [], [], [], [], [], []
|
|
self.logger.debug("enter get_slopes_and_deskew_new")
|
|
results = self.executor.map(partial(do_work_of_slopes_new,
|
|
textline_mask_tot_ea=textline_mask_tot,
|
|
image_page_rotated=image_page_rotated,
|
|
slope_deskew=slope_deskew,
|
|
MAX_SLOPE=MAX_SLOPE,
|
|
KERNEL=KERNEL,
|
|
logger=self.logger,
|
|
plotter=self.plotter,),
|
|
boxes, contours, contours_par, range(len(contours_par)))
|
|
#textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results)
|
|
self.logger.debug("exit get_slopes_and_deskew_new")
|
|
return tuple(zip(*results))
|
|
|
|
def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew):
|
|
if not len(contours):
|
|
return [], [], [], [], [], [], []
|
|
self.logger.debug("enter get_slopes_and_deskew_new_curved")
|
|
results = self.executor.map(partial(do_work_of_slopes_new_curved,
|
|
textline_mask_tot_ea=textline_mask_tot,
|
|
image_page_rotated=image_page_rotated,
|
|
mask_texts_only=mask_texts_only,
|
|
num_col=num_col,
|
|
scale_par=scale_par,
|
|
slope_deskew=slope_deskew,
|
|
MAX_SLOPE=MAX_SLOPE,
|
|
KERNEL=KERNEL,
|
|
logger=self.logger,
|
|
plotter=self.plotter,),
|
|
boxes, contours, contours_par, range(len(contours_par)))
|
|
#textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results)
|
|
self.logger.debug("exit get_slopes_and_deskew_new_curved")
|
|
return tuple(zip(*results))
|
|
|
|
def textline_contours(self, img, use_patches, scaler_h, scaler_w, num_col_classifier=None):
|
|
self.logger.debug('enter textline_contours')
|
|
|
|
#img = img.astype(np.uint8)
|
|
img_org = np.copy(img)
|
|
img_h = img_org.shape[0]
|
|
img_w = img_org.shape[1]
|
|
img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w))
|
|
|
|
prediction_textline = self.do_prediction(
|
|
use_patches, img, self.model_textline,
|
|
marginal_of_patch_percent=0.15, n_batch_inference=3,
|
|
thresholding_for_artificial_class_in_light_version=self.textline_light)
|
|
#if not self.textline_light:
|
|
#if num_col_classifier==1:
|
|
#prediction_textline_nopatch = self.do_prediction(False, img, self.model_textline)
|
|
#prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0
|
|
|
|
prediction_textline = resize_image(prediction_textline, img_h, img_w)
|
|
textline_mask_tot_ea_art = (prediction_textline[:,:]==2)*1
|
|
|
|
old_art = np.copy(textline_mask_tot_ea_art)
|
|
if not self.textline_light:
|
|
textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8')
|
|
#textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1)
|
|
prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2
|
|
|
|
textline_mask_tot_ea_lines = (prediction_textline[:,:]==1)*1
|
|
textline_mask_tot_ea_lines = textline_mask_tot_ea_lines.astype('uint8')
|
|
if not self.textline_light:
|
|
textline_mask_tot_ea_lines = cv2.dilate(textline_mask_tot_ea_lines, KERNEL, iterations=1)
|
|
|
|
prediction_textline[:,:][textline_mask_tot_ea_lines[:,:]==1]=1
|
|
if not self.textline_light:
|
|
prediction_textline[:,:][old_art[:,:]==1]=2
|
|
|
|
prediction_textline_longshot = self.do_prediction(False, img, self.model_textline)
|
|
prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w)
|
|
|
|
self.logger.debug('exit textline_contours')
|
|
return ((prediction_textline[:, :, 0]==1).astype(np.uint8),
|
|
(prediction_textline_longshot_true_size[:, :, 0]==1).astype(np.uint8))
|
|
|
|
|
|
def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process):
|
|
self.logger.debug('enter do_work_of_slopes')
|
|
slope_biggest = 0
|
|
slopes_sub = []
|
|
boxes_sub_new = []
|
|
poly_sub = []
|
|
for mv in range(len(boxes_per_process)):
|
|
crop_img, _ = crop_image_inside_box(boxes_per_process[mv], np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2))
|
|
crop_img = crop_img[:, :, 0]
|
|
crop_img = cv2.erode(crop_img, KERNEL, iterations=2)
|
|
try:
|
|
textline_con, hierarchy = return_contours_of_image(crop_img)
|
|
textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierarchy, max_area=1, min_area=0.0008)
|
|
y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
|
|
sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0)))
|
|
crop_img[crop_img > 0] = 1
|
|
slope_corresponding_textregion = return_deskew_slop(crop_img, sigma_des,
|
|
map=self.executor.map, logger=self.logger, plotter=self.plotter)
|
|
except Exception as why:
|
|
self.logger.error(why)
|
|
slope_corresponding_textregion = MAX_SLOPE
|
|
|
|
if slope_corresponding_textregion == MAX_SLOPE:
|
|
slope_corresponding_textregion = slope_biggest
|
|
slopes_sub.append(slope_corresponding_textregion)
|
|
|
|
cnt_clean_rot = textline_contours_postprocessing(
|
|
crop_img, slope_corresponding_textregion, contours_per_process[mv], boxes_per_process[mv])
|
|
|
|
poly_sub.append(cnt_clean_rot)
|
|
boxes_sub_new.append(boxes_per_process[mv])
|
|
|
|
q.put(slopes_sub)
|
|
poly.put(poly_sub)
|
|
box_sub.put(boxes_sub_new)
|
|
self.logger.debug('exit do_work_of_slopes')
|
|
|
|
def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_classifier):
|
|
self.logger.debug("enter get_regions_extract_images_only")
|
|
erosion_hurts = False
|
|
img_org = np.copy(img)
|
|
img_height_h = img_org.shape[0]
|
|
img_width_h = img_org.shape[1]
|
|
|
|
if num_col_classifier == 1:
|
|
img_w_new = 700
|
|
elif num_col_classifier == 2:
|
|
img_w_new = 900
|
|
elif num_col_classifier == 3:
|
|
img_w_new = 1500
|
|
elif num_col_classifier == 4:
|
|
img_w_new = 1800
|
|
elif num_col_classifier == 5:
|
|
img_w_new = 2200
|
|
elif num_col_classifier == 6:
|
|
img_w_new = 2500
|
|
img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new)
|
|
img_resized = resize_image(img,img_h_new, img_w_new )
|
|
|
|
prediction_regions_org, _ = self.do_prediction_new_concept(True, img_resized, self.model_region)
|
|
|
|
prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h )
|
|
image_page, page_coord, cont_page = self.extract_page()
|
|
|
|
prediction_regions_org = prediction_regions_org[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
|
prediction_regions_org=prediction_regions_org[:,:,0]
|
|
|
|
mask_lines_only = (prediction_regions_org[:,:] ==3)*1
|
|
mask_texts_only = (prediction_regions_org[:,:] ==1)*1
|
|
mask_images_only=(prediction_regions_org[:,:] ==2)*1
|
|
|
|
polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
|
|
polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(
|
|
mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
|
|
|
|
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
|
|
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
|
|
|
|
text_regions_p_true = np.zeros(prediction_regions_org.shape)
|
|
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3))
|
|
|
|
text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
|
|
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1,1,1))
|
|
|
|
text_regions_p_true[text_regions_p_true.shape[0]-15:text_regions_p_true.shape[0], :] = 0
|
|
text_regions_p_true[:, text_regions_p_true.shape[1]-15:text_regions_p_true.shape[1]] = 0
|
|
|
|
##polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.0001)
|
|
polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.001)
|
|
image_boundary_of_doc = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1]))
|
|
|
|
###image_boundary_of_doc[:6, :] = 1
|
|
###image_boundary_of_doc[text_regions_p_true.shape[0]-6:text_regions_p_true.shape[0], :] = 1
|
|
|
|
###image_boundary_of_doc[:, :6] = 1
|
|
###image_boundary_of_doc[:, text_regions_p_true.shape[1]-6:text_regions_p_true.shape[1]] = 1
|
|
|
|
polygons_of_images_fin = []
|
|
for ploy_img_ind in polygons_of_images:
|
|
"""
|
|
test_poly_image = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1]))
|
|
test_poly_image = cv2.fillPoly(test_poly_image, pts=[ploy_img_ind], color=(1,1,1))
|
|
|
|
test_poly_image = test_poly_image + image_boundary_of_doc
|
|
test_poly_image_intersected_area = ( test_poly_image[:,:]==2 )*1
|
|
|
|
test_poly_image_intersected_area = test_poly_image_intersected_area.sum()
|
|
|
|
if test_poly_image_intersected_area==0:
|
|
##polygons_of_images_fin.append(ploy_img_ind)
|
|
|
|
box = cv2.boundingRect(ploy_img_ind)
|
|
_, page_coord_img = crop_image_inside_box(box, text_regions_p_true)
|
|
# cont_page.append(np.array([[page_coord[2], page_coord[0]],
|
|
# [page_coord[3], page_coord[0]],
|
|
# [page_coord[3], page_coord[1]],
|
|
# [page_coord[2], page_coord[1]]]))
|
|
polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]],
|
|
[page_coord_img[3], page_coord_img[0]],
|
|
[page_coord_img[3], page_coord_img[1]],
|
|
[page_coord_img[2], page_coord_img[1]]]) )
|
|
"""
|
|
box = x, y, w, h = cv2.boundingRect(ploy_img_ind)
|
|
if h < 150 or w < 150:
|
|
pass
|
|
else:
|
|
_, page_coord_img = crop_image_inside_box(box, text_regions_p_true)
|
|
# cont_page.append(np.array([[page_coord[2], page_coord[0]],
|
|
# [page_coord[3], page_coord[0]],
|
|
# [page_coord[3], page_coord[1]],
|
|
# [page_coord[2], page_coord[1]]]))
|
|
polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]],
|
|
[page_coord_img[3], page_coord_img[0]],
|
|
[page_coord_img[3], page_coord_img[1]],
|
|
[page_coord_img[2], page_coord_img[1]]]))
|
|
|
|
self.logger.debug("exit get_regions_extract_images_only")
|
|
return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page
|
|
|
|
def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False):
|
|
self.logger.debug("enter get_regions_light_v")
|
|
t_in = time.time()
|
|
erosion_hurts = False
|
|
img_org = np.copy(img)
|
|
img_height_h = img_org.shape[0]
|
|
img_width_h = img_org.shape[1]
|
|
|
|
#print(num_col_classifier,'num_col_classifier')
|
|
|
|
if num_col_classifier == 1:
|
|
img_w_new = 1000
|
|
elif num_col_classifier == 2:
|
|
img_w_new = 1500#1500
|
|
elif num_col_classifier == 3:
|
|
img_w_new = 2000
|
|
elif num_col_classifier == 4:
|
|
img_w_new = 2500
|
|
elif num_col_classifier == 5:
|
|
img_w_new = 3000
|
|
else:
|
|
img_w_new = 4000
|
|
img_h_new = img_w_new * img_org.shape[0] // img_org.shape[1]
|
|
img_resized = resize_image(img,img_h_new, img_w_new )
|
|
|
|
t_bin = time.time()
|
|
#if (not self.input_binary) or self.full_layout:
|
|
#if self.input_binary:
|
|
#img_bin = np.copy(img_resized)
|
|
###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30):
|
|
###prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5)
|
|
|
|
####print("inside bin ", time.time()-t_bin)
|
|
###prediction_bin=prediction_bin[:,:,0]
|
|
###prediction_bin = (prediction_bin[:,:]==0)*1
|
|
###prediction_bin = prediction_bin*255
|
|
|
|
###prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
|
|
|
|
###prediction_bin = prediction_bin.astype(np.uint16)
|
|
####img= np.copy(prediction_bin)
|
|
###img_bin = np.copy(prediction_bin)
|
|
###else:
|
|
###img_bin = np.copy(img_resized)
|
|
if self.ocr and not self.input_binary:
|
|
prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5)
|
|
prediction_bin = 255 * (prediction_bin[:,:,0] == 0)
|
|
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
|
|
prediction_bin = prediction_bin.astype(np.uint16)
|
|
#img= np.copy(prediction_bin)
|
|
img_bin = np.copy(prediction_bin)
|
|
else:
|
|
img_bin = np.copy(img_resized)
|
|
#print("inside 1 ", time.time()-t_in)
|
|
|
|
###textline_mask_tot_ea = self.run_textline(img_bin)
|
|
self.logger.debug("detecting textlines on %s with %d colors", str(img_resized.shape), len(np.unique(img_resized)))
|
|
textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier)
|
|
textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h )
|
|
|
|
#print(self.image_org.shape)
|
|
#cv2.imwrite('out_13.png', self.image_page_org_size)
|
|
|
|
#plt.imshwo(self.image_page_org_size)
|
|
#plt.show()
|
|
if not skip_layout_and_reading_order:
|
|
#print("inside 2 ", time.time()-t_in)
|
|
if num_col_classifier == 1 or num_col_classifier == 2:
|
|
if self.image_org.shape[0]/self.image_org.shape[1] > 2.5:
|
|
self.logger.debug("resized to %dx%d for %d cols",
|
|
img_resized.shape[1], img_resized.shape[0], num_col_classifier)
|
|
prediction_regions_org, confidence_matrix = self.do_prediction_new_concept(
|
|
True, img_resized, self.model_region_1_2, n_batch_inference=1,
|
|
thresholding_for_some_classes_in_light_version=True)
|
|
else:
|
|
prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3))
|
|
confidence_matrix = np.zeros((self.image_org.shape[0], self.image_org.shape[1]))
|
|
prediction_regions_page, confidence_matrix_page = self.do_prediction_new_concept(
|
|
False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1,
|
|
thresholding_for_artificial_class_in_light_version=True)
|
|
ys = slice(*self.page_coord[0:2])
|
|
xs = slice(*self.page_coord[2:4])
|
|
prediction_regions_org[ys, xs] = prediction_regions_page
|
|
confidence_matrix[ys, xs] = confidence_matrix_page
|
|
|
|
else:
|
|
new_h = (900+ (num_col_classifier-3)*100)
|
|
img_resized = resize_image(img_bin, int(new_h * img_bin.shape[0] /img_bin.shape[1]), new_h)
|
|
self.logger.debug("resized to %dx%d (new_h=%d) for %d cols",
|
|
img_resized.shape[1], img_resized.shape[0], new_h, num_col_classifier)
|
|
prediction_regions_org, confidence_matrix = self.do_prediction_new_concept(
|
|
True, img_resized, self.model_region_1_2, n_batch_inference=2,
|
|
thresholding_for_some_classes_in_light_version=True)
|
|
###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True)
|
|
#print("inside 3 ", time.time()-t_in)
|
|
#plt.imshow(prediction_regions_org[:,:,0])
|
|
#plt.show()
|
|
|
|
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
|
|
confidence_matrix = resize_image(confidence_matrix, img_height_h, img_width_h )
|
|
img_bin = resize_image(img_bin, img_height_h, img_width_h )
|
|
prediction_regions_org=prediction_regions_org[:,:,0]
|
|
|
|
mask_lines_only = (prediction_regions_org[:,:] ==3)*1
|
|
mask_texts_only = (prediction_regions_org[:,:] ==1)*1
|
|
mask_texts_only = mask_texts_only.astype('uint8')
|
|
|
|
##if num_col_classifier == 1 or num_col_classifier == 2:
|
|
###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1)
|
|
##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1)
|
|
|
|
mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1)
|
|
mask_images_only=(prediction_regions_org[:,:] ==2)*1
|
|
|
|
polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
|
|
test_khat = np.zeros(prediction_regions_org.shape)
|
|
test_khat = cv2.fillPoly(test_khat, pts=polygons_lines_xml, color=(1,1,1))
|
|
|
|
#plt.imshow(test_khat[:,:])
|
|
#plt.show()
|
|
#for jv in range(1):
|
|
#print(jv, hir_lines_xml[0][232][3])
|
|
#test_khat = np.zeros(prediction_regions_org.shape)
|
|
#test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1))
|
|
#plt.imshow(test_khat[:,:])
|
|
#plt.show()
|
|
|
|
polygons_lines_xml = filter_contours_area_of_image(
|
|
mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
|
|
|
|
test_khat = np.zeros(prediction_regions_org.shape)
|
|
test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1))
|
|
|
|
#plt.imshow(test_khat[:,:])
|
|
#plt.show()
|
|
#sys.exit()
|
|
|
|
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
|
|
##polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts)
|
|
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
|
|
|
|
text_regions_p_true = np.zeros(prediction_regions_org.shape)
|
|
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_lines, color=(3,3,3))
|
|
|
|
text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
|
|
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1))
|
|
|
|
#plt.imshow(textline_mask_tot_ea)
|
|
#plt.show()
|
|
|
|
textline_mask_tot_ea[(text_regions_p_true==0) | (text_regions_p_true==4) ] = 0
|
|
|
|
#plt.imshow(textline_mask_tot_ea)
|
|
#plt.show()
|
|
#print("inside 4 ", time.time()-t_in)
|
|
self.logger.debug("exit get_regions_light_v")
|
|
return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin, confidence_matrix
|
|
else:
|
|
img_bin = resize_image(img_bin,img_height_h, img_width_h )
|
|
self.logger.debug("exit get_regions_light_v")
|
|
return None, erosion_hurts, None, textline_mask_tot_ea, img_bin, None
|
|
|
|
def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier):
|
|
self.logger.debug("enter get_regions_from_xy_2models")
|
|
erosion_hurts = False
|
|
img_org = np.copy(img)
|
|
img_height_h = img_org.shape[0]
|
|
img_width_h = img_org.shape[1]
|
|
|
|
ratio_y=1.3
|
|
ratio_x=1
|
|
|
|
img = resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
|
|
prediction_regions_org_y = self.do_prediction(True, img, self.model_region)
|
|
prediction_regions_org_y = resize_image(prediction_regions_org_y, img_height_h, img_width_h )
|
|
|
|
#plt.imshow(prediction_regions_org_y[:,:,0])
|
|
#plt.show()
|
|
prediction_regions_org_y = prediction_regions_org_y[:,:,0]
|
|
mask_zeros_y = (prediction_regions_org_y[:,:]==0)*1
|
|
|
|
##img_only_regions_with_sep = ( (prediction_regions_org_y[:,:] != 3) & (prediction_regions_org_y[:,:] != 0) )*1
|
|
img_only_regions_with_sep = (prediction_regions_org_y == 1).astype(np.uint8)
|
|
try:
|
|
img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=20)
|
|
_, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
|
|
img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1)))
|
|
|
|
prediction_regions_org = self.do_prediction(True, img, self.model_region)
|
|
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
|
|
|
|
prediction_regions_org=prediction_regions_org[:,:,0]
|
|
prediction_regions_org[(prediction_regions_org[:,:]==1) & (mask_zeros_y[:,:]==1)]=0
|
|
|
|
img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]))
|
|
|
|
prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, marginal_of_patch_percent=0.2)
|
|
prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h )
|
|
|
|
mask_zeros2 = (prediction_regions_org2[:,:,0] == 0)
|
|
mask_lines2 = (prediction_regions_org2[:,:,0] == 3)
|
|
text_sume_early = (prediction_regions_org[:,:] == 1).sum()
|
|
prediction_regions_org_copy = np.copy(prediction_regions_org)
|
|
prediction_regions_org_copy[(prediction_regions_org_copy[:,:]==1) & (mask_zeros2[:,:]==1)] = 0
|
|
text_sume_second = ((prediction_regions_org_copy[:,:]==1)*1).sum()
|
|
rate_two_models = 100. * text_sume_second / text_sume_early
|
|
|
|
self.logger.info("ratio_of_two_models: %s", rate_two_models)
|
|
if not(is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD):
|
|
prediction_regions_org = np.copy(prediction_regions_org_copy)
|
|
|
|
prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3
|
|
mask_lines_only=(prediction_regions_org[:,:]==3)*1
|
|
prediction_regions_org = cv2.erode(prediction_regions_org[:,:], KERNEL, iterations=2)
|
|
prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2)
|
|
|
|
if rate_two_models<=40:
|
|
if self.input_binary:
|
|
prediction_bin = np.copy(img_org)
|
|
else:
|
|
prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5)
|
|
prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h )
|
|
prediction_bin = 255 * (prediction_bin[:,:,0]==0)
|
|
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
|
|
|
|
ratio_y=1
|
|
ratio_x=1
|
|
|
|
img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
|
|
|
|
prediction_regions_org = self.do_prediction(True, img, self.model_region)
|
|
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
|
|
prediction_regions_org=prediction_regions_org[:,:,0]
|
|
|
|
mask_lines_only=(prediction_regions_org[:,:]==3)*1
|
|
|
|
mask_texts_only=(prediction_regions_org[:,:]==1)*1
|
|
mask_images_only=(prediction_regions_org[:,:]==2)*1
|
|
|
|
polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
|
|
polygons_lines_xml = filter_contours_area_of_image(
|
|
mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
|
|
|
|
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001)
|
|
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001)
|
|
|
|
text_regions_p_true = np.zeros(prediction_regions_org.shape)
|
|
text_regions_p_true = cv2.fillPoly(text_regions_p_true,pts = polygons_of_only_lines, color=(3, 3, 3))
|
|
text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
|
|
|
|
text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1))
|
|
|
|
self.logger.debug("exit get_regions_from_xy_2models")
|
|
return text_regions_p_true, erosion_hurts, polygons_lines_xml
|
|
except:
|
|
if self.input_binary:
|
|
prediction_bin = np.copy(img_org)
|
|
prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5)
|
|
prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h )
|
|
prediction_bin = 255 * (prediction_bin[:,:,0]==0)
|
|
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
|
|
else:
|
|
prediction_bin = np.copy(img_org)
|
|
ratio_y=1
|
|
ratio_x=1
|
|
|
|
|
|
img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
|
|
prediction_regions_org = self.do_prediction(True, img, self.model_region)
|
|
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
|
|
prediction_regions_org=prediction_regions_org[:,:,0]
|
|
|
|
#mask_lines_only=(prediction_regions_org[:,:]==3)*1
|
|
#img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1))
|
|
|
|
#prediction_regions_org = self.do_prediction(True, img, self.model_region)
|
|
|
|
#prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
|
|
|
|
#prediction_regions_org = prediction_regions_org[:,:,0]
|
|
|
|
#prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0
|
|
|
|
|
|
mask_lines_only = (prediction_regions_org == 3)*1
|
|
mask_texts_only = (prediction_regions_org == 1)*1
|
|
mask_images_only= (prediction_regions_org == 2)*1
|
|
|
|
polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
|
|
polygons_lines_xml = filter_contours_area_of_image(
|
|
mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
|
|
|
|
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
|
|
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
|
|
|
|
text_regions_p_true = np.zeros(prediction_regions_org.shape)
|
|
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3))
|
|
|
|
text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
|
|
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1))
|
|
|
|
erosion_hurts = True
|
|
self.logger.debug("exit get_regions_from_xy_2models")
|
|
return text_regions_p_true, erosion_hurts, polygons_lines_xml
|
|
|
|
def do_order_of_regions_full_layout(
|
|
self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot):
|
|
|
|
self.logger.debug("enter do_order_of_regions_full_layout")
|
|
boxes = np.array(boxes, dtype=int) # to be on the safe side
|
|
cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours(
|
|
contours_only_text_parent)
|
|
cx_text_only_h, cy_text_only_h, x_min_text_only_h, _, _, _, y_cor_x_min_main_h = find_new_features_of_contours(
|
|
contours_only_text_parent_h)
|
|
|
|
try:
|
|
arg_text_con = []
|
|
for ii in range(len(cx_text_only)):
|
|
check_if_textregion_located_in_a_box = False
|
|
for jj in range(len(boxes)):
|
|
if (x_min_text_only[ii] + 80 >= boxes[jj][0] and
|
|
x_min_text_only[ii] + 80 < boxes[jj][1] and
|
|
y_cor_x_min_main[ii] >= boxes[jj][2] and
|
|
y_cor_x_min_main[ii] < boxes[jj][3]):
|
|
arg_text_con.append(jj)
|
|
check_if_textregion_located_in_a_box = True
|
|
break
|
|
if not check_if_textregion_located_in_a_box:
|
|
dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 +
|
|
(cy_text_only[ii] - boxes[jj][2]) ** 2)
|
|
for jj in range(len(boxes))]
|
|
ind_min = np.argmin(dists_tr_from_box)
|
|
arg_text_con.append(ind_min)
|
|
args_contours = np.array(range(len(arg_text_con)))
|
|
arg_text_con_h = []
|
|
for ii in range(len(cx_text_only_h)):
|
|
check_if_textregion_located_in_a_box = False
|
|
for jj in range(len(boxes)):
|
|
if (x_min_text_only_h[ii] + 80 >= boxes[jj][0] and
|
|
x_min_text_only_h[ii] + 80 < boxes[jj][1] and
|
|
y_cor_x_min_main_h[ii] >= boxes[jj][2] and
|
|
y_cor_x_min_main_h[ii] < boxes[jj][3]):
|
|
arg_text_con_h.append(jj)
|
|
check_if_textregion_located_in_a_box = True
|
|
break
|
|
if not check_if_textregion_located_in_a_box:
|
|
dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 +
|
|
(cy_text_only_h[ii] - boxes[jj][2]) ** 2)
|
|
for jj in range(len(boxes))]
|
|
ind_min = np.argmin(dists_tr_from_box)
|
|
arg_text_con_h.append(ind_min)
|
|
args_contours_h = np.array(range(len(arg_text_con_h)))
|
|
|
|
order_by_con_head = np.zeros(len(arg_text_con_h))
|
|
order_by_con_main = np.zeros(len(arg_text_con))
|
|
|
|
ref_point = 0
|
|
order_of_texts_tot = []
|
|
id_of_texts_tot = []
|
|
for iij in range(len(boxes)):
|
|
ys = slice(*boxes[iij][2:4])
|
|
xs = slice(*boxes[iij][0:2])
|
|
args_contours_box = args_contours[np.array(arg_text_con) == iij]
|
|
args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij]
|
|
con_inter_box = []
|
|
con_inter_box_h = []
|
|
|
|
for box in args_contours_box:
|
|
con_inter_box.append(contours_only_text_parent[box])
|
|
|
|
for box in args_contours_box_h:
|
|
con_inter_box_h.append(contours_only_text_parent_h[box])
|
|
|
|
indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
|
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2])
|
|
|
|
order_of_texts, id_of_texts = order_and_id_of_texts(
|
|
con_inter_box, con_inter_box_h,
|
|
matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
|
|
|
|
indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1]
|
|
indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1]
|
|
indexes_sorted_head = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 2]
|
|
indexes_by_type_head = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 2]
|
|
|
|
for zahler, _ in enumerate(args_contours_box):
|
|
arg_order_v = indexes_sorted_main[zahler]
|
|
order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \
|
|
np.where(indexes_sorted == arg_order_v)[0][0] + ref_point
|
|
|
|
for zahler, _ in enumerate(args_contours_box_h):
|
|
arg_order_v = indexes_sorted_head[zahler]
|
|
order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = \
|
|
np.where(indexes_sorted == arg_order_v)[0][0] + ref_point
|
|
|
|
for jji in range(len(id_of_texts)):
|
|
order_of_texts_tot.append(order_of_texts[jji] + ref_point)
|
|
id_of_texts_tot.append(id_of_texts[jji])
|
|
ref_point += len(id_of_texts)
|
|
|
|
order_of_texts_tot = []
|
|
for tj1 in range(len(contours_only_text_parent)):
|
|
order_of_texts_tot.append(int(order_by_con_main[tj1]))
|
|
|
|
for tj1 in range(len(contours_only_text_parent_h)):
|
|
order_of_texts_tot.append(int(order_by_con_head[tj1]))
|
|
|
|
order_text_new = []
|
|
for iii in range(len(order_of_texts_tot)):
|
|
order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0])
|
|
|
|
except Exception as why:
|
|
self.logger.error(why)
|
|
arg_text_con = []
|
|
for ii in range(len(cx_text_only)):
|
|
check_if_textregion_located_in_a_box = False
|
|
for jj in range(len(boxes)):
|
|
if (cx_text_only[ii] >= boxes[jj][0] and
|
|
cx_text_only[ii] < boxes[jj][1] and
|
|
cy_text_only[ii] >= boxes[jj][2] and
|
|
cy_text_only[ii] < boxes[jj][3]):
|
|
# this is valid if the center of region identify in which box it is located
|
|
arg_text_con.append(jj)
|
|
check_if_textregion_located_in_a_box = True
|
|
break
|
|
|
|
if not check_if_textregion_located_in_a_box:
|
|
dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 +
|
|
(cy_text_only[ii] - boxes[jj][2]) ** 2)
|
|
for jj in range(len(boxes))]
|
|
ind_min = np.argmin(dists_tr_from_box)
|
|
arg_text_con.append(ind_min)
|
|
args_contours = np.array(range(len(arg_text_con)))
|
|
order_by_con_main = np.zeros(len(arg_text_con))
|
|
|
|
############################# head
|
|
|
|
arg_text_con_h = []
|
|
for ii in range(len(cx_text_only_h)):
|
|
check_if_textregion_located_in_a_box = False
|
|
for jj in range(len(boxes)):
|
|
if (cx_text_only_h[ii] >= boxes[jj][0] and
|
|
cx_text_only_h[ii] < boxes[jj][1] and
|
|
cy_text_only_h[ii] >= boxes[jj][2] and
|
|
cy_text_only_h[ii] < boxes[jj][3]):
|
|
# this is valid if the center of region identify in which box it is located
|
|
arg_text_con_h.append(jj)
|
|
check_if_textregion_located_in_a_box = True
|
|
break
|
|
if not check_if_textregion_located_in_a_box:
|
|
dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 +
|
|
(cy_text_only_h[ii] - boxes[jj][2]) ** 2)
|
|
for jj in range(len(boxes))]
|
|
ind_min = np.argmin(dists_tr_from_box)
|
|
arg_text_con_h.append(ind_min)
|
|
args_contours_h = np.array(range(len(arg_text_con_h)))
|
|
order_by_con_head = np.zeros(len(arg_text_con_h))
|
|
|
|
ref_point = 0
|
|
order_of_texts_tot = []
|
|
id_of_texts_tot = []
|
|
for iij, _ in enumerate(boxes):
|
|
ys = slice(*boxes[iij][2:4])
|
|
xs = slice(*boxes[iij][0:2])
|
|
args_contours_box = args_contours[np.array(arg_text_con) == iij]
|
|
args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij]
|
|
con_inter_box = []
|
|
con_inter_box_h = []
|
|
|
|
for box in args_contours_box:
|
|
con_inter_box.append(contours_only_text_parent[box])
|
|
|
|
for box in args_contours_box_h:
|
|
con_inter_box_h.append(contours_only_text_parent_h[box])
|
|
|
|
indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
|
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2])
|
|
|
|
order_of_texts, id_of_texts = order_and_id_of_texts(
|
|
con_inter_box, con_inter_box_h,
|
|
matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
|
|
|
|
indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1]
|
|
indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1]
|
|
indexes_sorted_head = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 2]
|
|
indexes_by_type_head = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 2]
|
|
|
|
for zahler, _ in enumerate(args_contours_box):
|
|
arg_order_v = indexes_sorted_main[zahler]
|
|
order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \
|
|
np.where(indexes_sorted == arg_order_v)[0][0] + ref_point
|
|
|
|
for zahler, _ in enumerate(args_contours_box_h):
|
|
arg_order_v = indexes_sorted_head[zahler]
|
|
order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = \
|
|
np.where(indexes_sorted == arg_order_v)[0][0] + ref_point
|
|
|
|
for jji, _ in enumerate(id_of_texts):
|
|
order_of_texts_tot.append(order_of_texts[jji] + ref_point)
|
|
id_of_texts_tot.append(id_of_texts[jji])
|
|
ref_point += len(id_of_texts)
|
|
|
|
order_of_texts_tot = []
|
|
for tj1 in range(len(contours_only_text_parent)):
|
|
order_of_texts_tot.append(int(order_by_con_main[tj1]))
|
|
|
|
for tj1 in range(len(contours_only_text_parent_h)):
|
|
order_of_texts_tot.append(int(order_by_con_head[tj1]))
|
|
|
|
order_text_new = []
|
|
for iii in range(len(order_of_texts_tot)):
|
|
order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0])
|
|
|
|
self.logger.debug("exit do_order_of_regions_full_layout")
|
|
return order_text_new, id_of_texts_tot
|
|
|
|
def do_order_of_regions_no_full_layout(
|
|
self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot):
|
|
|
|
self.logger.debug("enter do_order_of_regions_no_full_layout")
|
|
boxes = np.array(boxes, dtype=int) # to be on the safe side
|
|
cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours(
|
|
contours_only_text_parent)
|
|
|
|
try:
|
|
arg_text_con = []
|
|
for ii in range(len(cx_text_only)):
|
|
check_if_textregion_located_in_a_box = False
|
|
for jj in range(len(boxes)):
|
|
if (x_min_text_only[ii] + 80 >= boxes[jj][0] and
|
|
x_min_text_only[ii] + 80 < boxes[jj][1] and
|
|
y_cor_x_min_main[ii] >= boxes[jj][2] and
|
|
y_cor_x_min_main[ii] < boxes[jj][3]):
|
|
arg_text_con.append(jj)
|
|
check_if_textregion_located_in_a_box = True
|
|
break
|
|
if not check_if_textregion_located_in_a_box:
|
|
dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 +
|
|
(cy_text_only[ii] - boxes[jj][2]) ** 2)
|
|
for jj in range(len(boxes))]
|
|
ind_min = np.argmin(dists_tr_from_box)
|
|
arg_text_con.append(ind_min)
|
|
args_contours = np.array(range(len(arg_text_con)))
|
|
order_by_con_main = np.zeros(len(arg_text_con))
|
|
|
|
ref_point = 0
|
|
order_of_texts_tot = []
|
|
id_of_texts_tot = []
|
|
for iij in range(len(boxes)):
|
|
ys = slice(*boxes[iij][2:4])
|
|
xs = slice(*boxes[iij][0:2])
|
|
args_contours_box = args_contours[np.array(arg_text_con) == iij]
|
|
con_inter_box = []
|
|
con_inter_box_h = []
|
|
for i in range(len(args_contours_box)):
|
|
con_inter_box.append(contours_only_text_parent[args_contours_box[i]])
|
|
|
|
indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
|
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2])
|
|
|
|
order_of_texts, id_of_texts = order_and_id_of_texts(
|
|
con_inter_box, con_inter_box_h,
|
|
matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
|
|
|
|
indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1]
|
|
indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1]
|
|
|
|
for zahler, _ in enumerate(args_contours_box):
|
|
arg_order_v = indexes_sorted_main[zahler]
|
|
order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \
|
|
np.where(indexes_sorted == arg_order_v)[0][0] + ref_point
|
|
|
|
for jji, _ in enumerate(id_of_texts):
|
|
order_of_texts_tot.append(order_of_texts[jji] + ref_point)
|
|
id_of_texts_tot.append(id_of_texts[jji])
|
|
ref_point += len(id_of_texts)
|
|
|
|
order_of_texts_tot = []
|
|
for tj1 in range(len(contours_only_text_parent)):
|
|
order_of_texts_tot.append(int(order_by_con_main[tj1]))
|
|
|
|
order_text_new = []
|
|
for iii in range(len(order_of_texts_tot)):
|
|
order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0])
|
|
|
|
except Exception as why:
|
|
self.logger.error(why)
|
|
arg_text_con = []
|
|
for ii in range(len(cx_text_only)):
|
|
check_if_textregion_located_in_a_box = False
|
|
for jj in range(len(boxes)):
|
|
if (cx_text_only[ii] >= boxes[jj][0] and
|
|
cx_text_only[ii] < boxes[jj][1] and
|
|
cy_text_only[ii] >= boxes[jj][2] and
|
|
cy_text_only[ii] < boxes[jj][3]):
|
|
# this is valid if the center of region identify in which box it is located
|
|
arg_text_con.append(jj)
|
|
check_if_textregion_located_in_a_box = True
|
|
break
|
|
if not check_if_textregion_located_in_a_box:
|
|
dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 +
|
|
(cy_text_only[ii] - boxes[jj][2]) ** 2)
|
|
for jj in range(len(boxes))]
|
|
ind_min = np.argmin(dists_tr_from_box)
|
|
arg_text_con.append(ind_min)
|
|
args_contours = np.array(range(len(arg_text_con)))
|
|
order_by_con_main = np.zeros(len(arg_text_con))
|
|
|
|
ref_point = 0
|
|
order_of_texts_tot = []
|
|
id_of_texts_tot = []
|
|
for iij in range(len(boxes)):
|
|
ys = slice(*boxes[iij][2:4])
|
|
xs = slice(*boxes[iij][0:2])
|
|
args_contours_box = args_contours[np.array(arg_text_con) == iij]
|
|
con_inter_box = []
|
|
con_inter_box_h = []
|
|
for i in range(len(args_contours_box)):
|
|
con_inter_box.append(contours_only_text_parent[args_contours_box[i]])
|
|
|
|
indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
|
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2])
|
|
|
|
order_of_texts, id_of_texts = order_and_id_of_texts(
|
|
con_inter_box, con_inter_box_h,
|
|
matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
|
|
|
|
indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1]
|
|
indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1]
|
|
|
|
for zahler, _ in enumerate(args_contours_box):
|
|
arg_order_v = indexes_sorted_main[zahler]
|
|
order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \
|
|
np.where(indexes_sorted == arg_order_v)[0][0] + ref_point
|
|
|
|
for jji, _ in enumerate(id_of_texts):
|
|
order_of_texts_tot.append(order_of_texts[jji] + ref_point)
|
|
id_of_texts_tot.append(id_of_texts[jji])
|
|
ref_point += len(id_of_texts)
|
|
|
|
order_of_texts_tot = []
|
|
|
|
for tj1 in range(len(contours_only_text_parent)):
|
|
order_of_texts_tot.append(int(order_by_con_main[tj1]))
|
|
|
|
order_text_new = []
|
|
for iii in range(len(order_of_texts_tot)):
|
|
order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0])
|
|
|
|
self.logger.debug("exit do_order_of_regions_no_full_layout")
|
|
return order_text_new, id_of_texts_tot
|
|
|
|
def check_iou_of_bounding_box_and_contour_for_tables(
|
|
self, layout, table_prediction_early, pixel_table, num_col_classifier):
|
|
|
|
layout_org = np.copy(layout)
|
|
layout_org[:,:,0][layout_org[:,:,0]==pixel_table] = 0
|
|
layout = (layout[:,:,0]==pixel_table)*1
|
|
|
|
layout =np.repeat(layout[:, :, np.newaxis], 3, axis=2)
|
|
layout = layout.astype(np.uint8)
|
|
imgray = cv2.cvtColor(layout, cv2.COLOR_BGR2GRAY )
|
|
_, thresh = cv2.threshold(imgray, 0, 255, 0)
|
|
|
|
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
|
cnt_size = np.array([cv2.contourArea(contours[j])
|
|
for j in range(len(contours))])
|
|
|
|
contours_new = []
|
|
for i in range(len(contours)):
|
|
x, y, w, h = cv2.boundingRect(contours[i])
|
|
iou = cnt_size[i] /float(w*h) *100
|
|
if iou<80:
|
|
layout_contour = np.zeros((layout_org.shape[0], layout_org.shape[1]))
|
|
layout_contour= cv2.fillPoly(layout_contour,pts=[contours[i]] ,color=(1,1,1))
|
|
|
|
layout_contour_sum = layout_contour.sum(axis=0)
|
|
layout_contour_sum_diff = np.diff(layout_contour_sum)
|
|
layout_contour_sum_diff= np.abs(layout_contour_sum_diff)
|
|
layout_contour_sum_diff_smoothed= gaussian_filter1d(layout_contour_sum_diff, 10)
|
|
|
|
peaks, _ = find_peaks(layout_contour_sum_diff_smoothed, height=0)
|
|
peaks= peaks[layout_contour_sum_diff_smoothed[peaks]>4]
|
|
|
|
for j in range(len(peaks)):
|
|
layout_contour[:,peaks[j]-3+1:peaks[j]+1+3] = 0
|
|
|
|
layout_contour=cv2.erode(layout_contour[:,:], KERNEL, iterations=5)
|
|
layout_contour=cv2.dilate(layout_contour[:,:], KERNEL, iterations=5)
|
|
|
|
layout_contour =np.repeat(layout_contour[:, :, np.newaxis], 3, axis=2)
|
|
layout_contour = layout_contour.astype(np.uint8)
|
|
|
|
imgray = cv2.cvtColor(layout_contour, cv2.COLOR_BGR2GRAY )
|
|
_, thresh = cv2.threshold(imgray, 0, 255, 0)
|
|
|
|
contours_sep, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
|
|
|
for ji in range(len(contours_sep) ):
|
|
contours_new.append(contours_sep[ji])
|
|
if num_col_classifier>=2:
|
|
only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1]))
|
|
only_recent_contour_image= cv2.fillPoly(only_recent_contour_image, pts=[contours_sep[ji]], color=(1,1,1))
|
|
table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early
|
|
iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum()
|
|
#print(iou_in,'iou_in_in1')
|
|
|
|
if iou_in>30:
|
|
layout_org= cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=3 * (pixel_table,))
|
|
else:
|
|
pass
|
|
else:
|
|
layout_org= cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=3 * (pixel_table,))
|
|
else:
|
|
contours_new.append(contours[i])
|
|
if num_col_classifier>=2:
|
|
only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1]))
|
|
only_recent_contour_image= cv2.fillPoly(only_recent_contour_image,pts=[contours[i]] ,color=(1,1,1))
|
|
|
|
table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early
|
|
iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum()
|
|
#print(iou_in,'iou_in')
|
|
if iou_in>30:
|
|
layout_org= cv2.fillPoly(layout_org, pts=[contours[i]], color=3 * (pixel_table,))
|
|
else:
|
|
pass
|
|
else:
|
|
layout_org= cv2.fillPoly(layout_org, pts=[contours[i]], color=3 * (pixel_table,))
|
|
|
|
return layout_org, contours_new
|
|
|
|
def delete_separator_around(self, spliter_y,peaks_neg,image_by_region, pixel_line, pixel_table):
|
|
# format of subboxes: box=[x1, x2 , y1, y2]
|
|
pix_del = 100
|
|
if len(image_by_region.shape)==3:
|
|
for i in range(len(spliter_y)-1):
|
|
for j in range(1,len(peaks_neg[i])-1):
|
|
ys = slice(int(spliter_y[i]),
|
|
int(spliter_y[i+1]))
|
|
xs = slice(peaks_neg[i][j] - pix_del,
|
|
peaks_neg[i][j] + pix_del)
|
|
image_by_region[ys,xs,0][image_by_region[ys,xs,0]==pixel_line] = 0
|
|
image_by_region[ys,xs,0][image_by_region[ys,xs,1]==pixel_line] = 0
|
|
image_by_region[ys,xs,0][image_by_region[ys,xs,2]==pixel_line] = 0
|
|
|
|
image_by_region[ys,xs,0][image_by_region[ys,xs,0]==pixel_table] = 0
|
|
image_by_region[ys,xs,0][image_by_region[ys,xs,1]==pixel_table] = 0
|
|
image_by_region[ys,xs,0][image_by_region[ys,xs,2]==pixel_table] = 0
|
|
else:
|
|
for i in range(len(spliter_y)-1):
|
|
for j in range(1,len(peaks_neg[i])-1):
|
|
ys = slice(int(spliter_y[i]),
|
|
int(spliter_y[i+1]))
|
|
xs = slice(peaks_neg[i][j] - pix_del,
|
|
peaks_neg[i][j] + pix_del)
|
|
image_by_region[ys,xs][image_by_region[ys,xs]==pixel_line] = 0
|
|
image_by_region[ys,xs][image_by_region[ys,xs]==pixel_table] = 0
|
|
return image_by_region
|
|
|
|
def add_tables_heuristic_to_layout(
|
|
self, image_regions_eraly_p, boxes,
|
|
slope_mean_hor, spliter_y, peaks_neg_tot, image_revised,
|
|
num_col_classifier, min_area, pixel_line):
|
|
|
|
pixel_table =10
|
|
image_revised_1 = self.delete_separator_around(spliter_y, peaks_neg_tot, image_revised, pixel_line, pixel_table)
|
|
|
|
try:
|
|
image_revised_1[:,:30][image_revised_1[:,:30]==pixel_line] = 0
|
|
image_revised_1[:,-30:][image_revised_1[:,-30:]==pixel_line] = 0
|
|
except:
|
|
pass
|
|
boxes = np.array(boxes, dtype=int) # to be on the safe side
|
|
|
|
img_comm_e = np.zeros(image_revised_1.shape)
|
|
img_comm = np.repeat(img_comm_e[:, :, np.newaxis], 3, axis=2)
|
|
|
|
for indiv in np.unique(image_revised_1):
|
|
image_col=(image_revised_1==indiv)*255
|
|
img_comm_in=np.repeat(image_col[:, :, np.newaxis], 3, axis=2)
|
|
img_comm_in=img_comm_in.astype(np.uint8)
|
|
|
|
imgray = cv2.cvtColor(img_comm_in, cv2.COLOR_BGR2GRAY)
|
|
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
|
contours,hirarchy=cv2.findContours(thresh.copy(), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
|
|
|
if indiv==pixel_table:
|
|
main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area = 1, min_area = 0.001)
|
|
else:
|
|
main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area = 1, min_area = min_area)
|
|
|
|
img_comm = cv2.fillPoly(img_comm, pts = main_contours, color = (indiv, indiv, indiv))
|
|
img_comm = img_comm.astype(np.uint8)
|
|
|
|
if not self.isNaN(slope_mean_hor):
|
|
image_revised_last = np.zeros((image_regions_eraly_p.shape[0], image_regions_eraly_p.shape[1],3))
|
|
for i in range(len(boxes)):
|
|
box_ys = slice(*boxes[i][2:4])
|
|
box_xs = slice(*boxes[i][0:2])
|
|
image_box = img_comm[box_ys, box_xs]
|
|
try:
|
|
image_box_tabels_1=(image_box[:,:,0]==pixel_table)*1
|
|
contours_tab,_=return_contours_of_image(image_box_tabels_1)
|
|
contours_tab=filter_contours_area_of_image_tables(image_box_tabels_1,contours_tab,_,1,0.003)
|
|
image_box_tabels_1=(image_box[:,:,0]==pixel_line)*1
|
|
|
|
image_box_tabels_and_m_text=( (image_box[:,:,0]==pixel_table) | (image_box[:,:,0]==1) )*1
|
|
image_box_tabels_and_m_text=image_box_tabels_and_m_text.astype(np.uint8)
|
|
|
|
image_box_tabels_1=image_box_tabels_1.astype(np.uint8)
|
|
image_box_tabels_1 = cv2.dilate(image_box_tabels_1,KERNEL,iterations = 5)
|
|
|
|
contours_table_m_text,_=return_contours_of_image(image_box_tabels_and_m_text)
|
|
image_box_tabels=np.repeat(image_box_tabels_1[:, :, np.newaxis], 3, axis=2)
|
|
|
|
image_box_tabels=image_box_tabels.astype(np.uint8)
|
|
imgray = cv2.cvtColor(image_box_tabels, cv2.COLOR_BGR2GRAY)
|
|
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
|
|
|
contours_line,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
|
|
|
y_min_main_line ,y_max_main_line=find_features_of_contours(contours_line)
|
|
y_min_main_tab ,y_max_main_tab=find_features_of_contours(contours_tab)
|
|
|
|
cx_tab_m_text,cy_tab_m_text ,x_min_tab_m_text , x_max_tab_m_text, y_min_tab_m_text ,y_max_tab_m_text, _= find_new_features_of_contours(contours_table_m_text)
|
|
cx_tabl,cy_tabl ,x_min_tabl , x_max_tabl, y_min_tabl ,y_max_tabl,_= find_new_features_of_contours(contours_tab)
|
|
|
|
if len(y_min_main_tab )>0:
|
|
y_down_tabs=[]
|
|
y_up_tabs=[]
|
|
|
|
for i_t in range(len(y_min_main_tab )):
|
|
y_down_tab=[]
|
|
y_up_tab=[]
|
|
for i_l in range(len(y_min_main_line)):
|
|
if y_min_main_tab[i_t]>y_min_main_line[i_l] and y_max_main_tab[i_t]>y_min_main_line[i_l] and y_min_main_tab[i_t]>y_max_main_line[i_l] and y_max_main_tab[i_t]>y_min_main_line[i_l]:
|
|
pass
|
|
elif y_min_main_tab[i_t]<y_max_main_line[i_l] and y_max_main_tab[i_t]<y_max_main_line[i_l] and y_max_main_tab[i_t]<y_min_main_line[i_l] and y_min_main_tab[i_t]<y_min_main_line[i_l]:
|
|
pass
|
|
elif np.abs(y_max_main_line[i_l]-y_min_main_line[i_l])<100:
|
|
pass
|
|
else:
|
|
y_up_tab.append(np.min([y_min_main_line[i_l], y_min_main_tab[i_t] ]) )
|
|
y_down_tab.append( np.max([ y_max_main_line[i_l],y_max_main_tab[i_t] ]) )
|
|
|
|
if len(y_up_tab)==0:
|
|
y_up_tabs.append(y_min_main_tab[i_t])
|
|
y_down_tabs.append(y_max_main_tab[i_t])
|
|
else:
|
|
y_up_tabs.append(np.min(y_up_tab))
|
|
y_down_tabs.append(np.max(y_down_tab))
|
|
else:
|
|
y_down_tabs=[]
|
|
y_up_tabs=[]
|
|
pass
|
|
except:
|
|
y_down_tabs=[]
|
|
y_up_tabs=[]
|
|
|
|
for ii in range(len(y_up_tabs)):
|
|
image_box[y_up_tabs[ii]:y_down_tabs[ii],:,0]=pixel_table
|
|
|
|
image_revised_last[box_ys, box_xs] = image_box
|
|
else:
|
|
for i in range(len(boxes)):
|
|
box_ys = slice(*boxes[i][2:4])
|
|
box_xs = slice(*boxes[i][0:2])
|
|
image_box = img_comm[box_ys, box_xs]
|
|
image_revised_last[box_ys, box_xs] = image_box
|
|
|
|
if num_col_classifier==1:
|
|
img_tables_col_1 = (image_revised_last[:,:,0] == pixel_table).astype(np.uint8)
|
|
contours_table_col1, _ = return_contours_of_image(img_tables_col_1)
|
|
|
|
_,_ ,_ , _, y_min_tab_col1 ,y_max_tab_col1, _= find_new_features_of_contours(contours_table_col1)
|
|
|
|
if len(y_min_tab_col1)>0:
|
|
for ijv in range(len(y_min_tab_col1)):
|
|
image_revised_last[int(y_min_tab_col1[ijv]):int(y_max_tab_col1[ijv]),:,:]=pixel_table
|
|
return image_revised_last
|
|
|
|
def do_order_of_regions(self, *args, **kwargs):
|
|
if self.full_layout:
|
|
return self.do_order_of_regions_full_layout(*args, **kwargs)
|
|
return self.do_order_of_regions_no_full_layout(*args, **kwargs)
|
|
|
|
def get_tables_from_model(self, img, num_col_classifier):
|
|
img_org = np.copy(img)
|
|
img_height_h = img_org.shape[0]
|
|
img_width_h = img_org.shape[1]
|
|
patches = False
|
|
if self.light_version:
|
|
prediction_table, _ = self.do_prediction_new_concept(patches, img, self.model_table)
|
|
prediction_table = prediction_table.astype(np.int16)
|
|
return prediction_table[:,:,0]
|
|
else:
|
|
if num_col_classifier < 4 and num_col_classifier > 2:
|
|
prediction_table = self.do_prediction(patches, img, self.model_table)
|
|
pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_table)
|
|
pre_updown = cv2.flip(pre_updown, -1)
|
|
|
|
prediction_table[:,:,0][pre_updown[:,:,0]==1]=1
|
|
prediction_table = prediction_table.astype(np.int16)
|
|
|
|
elif num_col_classifier ==2:
|
|
height_ext = 0 # img.shape[0] // 4
|
|
h_start = height_ext // 2
|
|
width_ext = img.shape[1] // 8
|
|
w_start = width_ext // 2
|
|
|
|
img_new = np.zeros((img.shape[0] + height_ext,
|
|
img.shape[1] + width_ext,
|
|
img.shape[2])).astype(float)
|
|
ys = slice(h_start, h_start + img.shape[0])
|
|
xs = slice(w_start, w_start + img.shape[1])
|
|
img_new[ys, xs] = img
|
|
|
|
prediction_ext = self.do_prediction(patches, img_new, self.model_table)
|
|
pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_table)
|
|
pre_updown = cv2.flip(pre_updown, -1)
|
|
|
|
prediction_table = prediction_ext[ys, xs]
|
|
prediction_table_updown = pre_updown[ys, xs]
|
|
|
|
prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1
|
|
prediction_table = prediction_table.astype(np.int16)
|
|
elif num_col_classifier ==1:
|
|
height_ext = 0 # img.shape[0] // 4
|
|
h_start = height_ext // 2
|
|
width_ext = img.shape[1] // 4
|
|
w_start = width_ext // 2
|
|
|
|
img_new =np.zeros((img.shape[0] + height_ext,
|
|
img.shape[1] + width_ext,
|
|
img.shape[2])).astype(float)
|
|
ys = slice(h_start, h_start + img.shape[0])
|
|
xs = slice(w_start, w_start + img.shape[1])
|
|
img_new[ys, xs] = img
|
|
|
|
prediction_ext = self.do_prediction(patches, img_new, self.model_table)
|
|
pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_table)
|
|
pre_updown = cv2.flip(pre_updown, -1)
|
|
|
|
prediction_table = prediction_ext[ys, xs]
|
|
prediction_table_updown = pre_updown[ys, xs]
|
|
|
|
prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1
|
|
prediction_table = prediction_table.astype(np.int16)
|
|
else:
|
|
prediction_table = np.zeros(img.shape)
|
|
img_w_half = img.shape[1] // 2
|
|
|
|
pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], self.model_table)
|
|
pre2 = self.do_prediction(patches, img[:,img_w_half:,:], self.model_table)
|
|
pre_full = self.do_prediction(patches, img[:,:,:], self.model_table)
|
|
pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_table)
|
|
pre_updown = cv2.flip(pre_updown, -1)
|
|
|
|
prediction_table_full_erode = cv2.erode(pre_full[:,:,0], KERNEL, iterations=4)
|
|
prediction_table_full_erode = cv2.dilate(prediction_table_full_erode, KERNEL, iterations=4)
|
|
|
|
prediction_table_full_updown_erode = cv2.erode(pre_updown[:,:,0], KERNEL, iterations=4)
|
|
prediction_table_full_updown_erode = cv2.dilate(prediction_table_full_updown_erode, KERNEL, iterations=4)
|
|
|
|
prediction_table[:,0:img_w_half,:] = pre1[:,:,:]
|
|
prediction_table[:,img_w_half:,:] = pre2[:,:,:]
|
|
|
|
prediction_table[:,:,0][prediction_table_full_erode[:,:]==1]=1
|
|
prediction_table[:,:,0][prediction_table_full_updown_erode[:,:]==1]=1
|
|
prediction_table = prediction_table.astype(np.int16)
|
|
|
|
#prediction_table_erode = cv2.erode(prediction_table[:,:,0], self.kernel, iterations=6)
|
|
#prediction_table_erode = cv2.dilate(prediction_table_erode, self.kernel, iterations=6)
|
|
|
|
prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20)
|
|
prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20)
|
|
return prediction_table_erode.astype(np.int16)
|
|
|
|
def run_graphics_and_columns_light(
|
|
self, text_regions_p_1, textline_mask_tot_ea,
|
|
num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light):
|
|
|
|
#print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics')
|
|
#print(erosion_hurts, 'erosion_hurts')
|
|
t_in_gr = time.time()
|
|
img_g = self.imread(grayscale=True, uint8=True)
|
|
|
|
img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3))
|
|
img_g3 = img_g3.astype(np.uint8)
|
|
img_g3[:, :, 0] = img_g[:, :]
|
|
img_g3[:, :, 1] = img_g[:, :]
|
|
img_g3[:, :, 2] = img_g[:, :]
|
|
|
|
image_page, page_coord, cont_page = self.extract_page()
|
|
#print("inside graphics 1 ", time.time() - t_in_gr)
|
|
if self.tables:
|
|
table_prediction = self.get_tables_from_model(image_page, num_col_classifier)
|
|
else:
|
|
table_prediction = np.zeros((image_page.shape[0], image_page.shape[1])).astype(np.int16)
|
|
|
|
if self.plotter:
|
|
self.plotter.save_page_image(image_page)
|
|
|
|
text_regions_p_1 = text_regions_p_1[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
|
textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
|
img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
|
|
|
mask_images = (text_regions_p_1[:, :] == 2) * 1
|
|
mask_images = mask_images.astype(np.uint8)
|
|
mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10)
|
|
mask_lines = (text_regions_p_1[:, :] == 3) * 1
|
|
mask_lines = mask_lines.astype(np.uint8)
|
|
img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1
|
|
img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8)
|
|
|
|
#print("inside graphics 2 ", time.time() - t_in_gr)
|
|
if erosion_hurts:
|
|
img_only_regions = np.copy(img_only_regions_with_sep[:,:])
|
|
else:
|
|
img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6)
|
|
|
|
##print(img_only_regions.shape,'img_only_regions')
|
|
##plt.imshow(img_only_regions[:,:])
|
|
##plt.show()
|
|
##num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
|
|
try:
|
|
num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
|
|
num_col = num_col + 1
|
|
if not num_column_is_classified:
|
|
num_col_classifier = num_col + 1
|
|
except Exception as why:
|
|
self.logger.error(why)
|
|
num_col = None
|
|
#print("inside graphics 3 ", time.time() - t_in_gr)
|
|
return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines,
|
|
text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light)
|
|
|
|
def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, img_bin_light):
|
|
#print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics')
|
|
#print(erosion_hurts, 'erosion_hurts')
|
|
t_in_gr = time.time()
|
|
img_g = self.imread(grayscale=True, uint8=True)
|
|
|
|
img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3))
|
|
img_g3 = img_g3.astype(np.uint8)
|
|
img_g3[:, :, 0] = img_g[:, :]
|
|
img_g3[:, :, 1] = img_g[:, :]
|
|
img_g3[:, :, 2] = img_g[:, :]
|
|
|
|
image_page, page_coord, cont_page = self.extract_page()
|
|
#print("inside graphics 1 ", time.time() - t_in_gr)
|
|
|
|
textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
|
img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
|
|
|
return page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page
|
|
|
|
def run_graphics_and_columns(
|
|
self, text_regions_p_1,
|
|
num_col_classifier, num_column_is_classified, erosion_hurts):
|
|
|
|
t_in_gr = time.time()
|
|
img_g = self.imread(grayscale=True, uint8=True)
|
|
|
|
img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3))
|
|
img_g3 = img_g3.astype(np.uint8)
|
|
img_g3[:, :, 0] = img_g[:, :]
|
|
img_g3[:, :, 1] = img_g[:, :]
|
|
img_g3[:, :, 2] = img_g[:, :]
|
|
|
|
image_page, page_coord, cont_page = self.extract_page()
|
|
|
|
if self.tables:
|
|
table_prediction = self.get_tables_from_model(image_page, num_col_classifier)
|
|
else:
|
|
table_prediction = np.zeros((image_page.shape[0], image_page.shape[1])).astype(np.int16)
|
|
|
|
if self.plotter:
|
|
self.plotter.save_page_image(image_page)
|
|
|
|
text_regions_p_1 = text_regions_p_1[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
|
mask_images = (text_regions_p_1[:, :] == 2) * 1
|
|
mask_images = mask_images.astype(np.uint8)
|
|
mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10)
|
|
mask_lines = (text_regions_p_1[:, :] == 3) * 1
|
|
mask_lines = mask_lines.astype(np.uint8)
|
|
img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1
|
|
img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8)
|
|
|
|
if erosion_hurts:
|
|
img_only_regions = np.copy(img_only_regions_with_sep[:,:])
|
|
else:
|
|
img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6)
|
|
try:
|
|
num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
|
|
num_col = num_col + 1
|
|
if not num_column_is_classified:
|
|
num_col_classifier = num_col + 1
|
|
except Exception as why:
|
|
self.logger.error(why)
|
|
num_col = None
|
|
return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines,
|
|
text_regions_p_1, cont_page, table_prediction)
|
|
|
|
def run_enhancement(self, light_version):
|
|
t_in = time.time()
|
|
self.logger.info("Resizing and enhancing image...")
|
|
is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = \
|
|
self.resize_and_enhance_image_with_column_classifier(light_version)
|
|
self.logger.info("Image was %senhanced.", '' if is_image_enhanced else 'not ')
|
|
scale = 1
|
|
if is_image_enhanced:
|
|
if self.allow_enhancement:
|
|
#img_res = img_res.astype(np.uint8)
|
|
self.get_image_and_scales(img_org, img_res, scale)
|
|
if self.plotter:
|
|
self.plotter.save_enhanced_image(img_res)
|
|
else:
|
|
self.get_image_and_scales_after_enhancing(img_org, img_res)
|
|
else:
|
|
if self.allow_enhancement:
|
|
self.get_image_and_scales(img_org, img_res, scale)
|
|
else:
|
|
self.get_image_and_scales(img_org, img_res, scale)
|
|
if self.allow_scaling:
|
|
img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced, img_bin)
|
|
self.get_image_and_scales_after_enhancing(img_org, img_res)
|
|
#print("enhancement in ", time.time()-t_in)
|
|
return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified
|
|
|
|
def run_textline(self, image_page, num_col_classifier=None):
|
|
scaler_h_textline = 1#1.3 # 1.2#1.2
|
|
scaler_w_textline = 1#1.3 # 0.9#1
|
|
#print(image_page.shape)
|
|
textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier)
|
|
if self.textline_light:
|
|
textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16)
|
|
|
|
if self.plotter:
|
|
self.plotter.save_plot_of_textlines(textline_mask_tot_ea, image_page)
|
|
return textline_mask_tot_ea
|
|
|
|
def run_deskew(self, textline_mask_tot_ea):
|
|
#print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew')
|
|
slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), 2, 30, True,
|
|
map=self.executor.map, logger=self.logger, plotter=self.plotter)
|
|
slope_first = 0
|
|
|
|
if self.plotter:
|
|
self.plotter.save_deskewed_image(slope_deskew)
|
|
self.logger.info("slope_deskew: %.2f°", slope_deskew)
|
|
return slope_deskew, slope_first
|
|
|
|
def run_marginals(
|
|
self, image_page, textline_mask_tot_ea, mask_images, mask_lines,
|
|
num_col_classifier, slope_deskew, text_regions_p_1, table_prediction):
|
|
|
|
image_page_rotated, textline_mask_tot = image_page[:, :], textline_mask_tot_ea[:, :]
|
|
textline_mask_tot[mask_images[:, :] == 1] = 0
|
|
|
|
text_regions_p_1[mask_lines[:, :] == 1] = 3
|
|
text_regions_p = text_regions_p_1[:, :]
|
|
text_regions_p = np.array(text_regions_p)
|
|
|
|
if num_col_classifier in (1, 2):
|
|
try:
|
|
regions_without_separators = (text_regions_p[:, :] == 1) * 1
|
|
if self.tables:
|
|
regions_without_separators[table_prediction==1] = 1
|
|
regions_without_separators = regions_without_separators.astype(np.uint8)
|
|
text_regions_p = get_marginals(
|
|
rotate_image(regions_without_separators, slope_deskew), text_regions_p,
|
|
num_col_classifier, slope_deskew, light_version=self.light_version, kernel=KERNEL)
|
|
except Exception as e:
|
|
self.logger.error("exception %s", e)
|
|
|
|
if self.plotter:
|
|
self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page)
|
|
self.plotter.save_plot_of_layout_main(text_regions_p, image_page)
|
|
return textline_mask_tot, text_regions_p, image_page_rotated
|
|
|
|
def run_boxes_no_full_layout(
|
|
self, image_page, textline_mask_tot, text_regions_p,
|
|
slope_deskew, num_col_classifier, table_prediction, erosion_hurts):
|
|
|
|
self.logger.debug('enter run_boxes_no_full_layout')
|
|
t_0_box = time.time()
|
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
|
_, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = rotation_not_90_func(
|
|
image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew)
|
|
text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1])
|
|
textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1])
|
|
table_prediction_n = resize_image(table_prediction_n, text_regions_p.shape[0], text_regions_p.shape[1])
|
|
regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1
|
|
if self.tables:
|
|
regions_without_separators_d[table_prediction_n[:,:] == 1] = 1
|
|
regions_without_separators = (text_regions_p[:, :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions)
|
|
#print(time.time()-t_0_box,'time box in 1')
|
|
if self.tables:
|
|
regions_without_separators[table_prediction ==1 ] = 1
|
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
text_regions_p_1_n = None
|
|
textline_mask_tot_d = None
|
|
regions_without_separators_d = None
|
|
pixel_lines = 3
|
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
_, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(
|
|
np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2),
|
|
num_col_classifier, self.tables, pixel_lines)
|
|
|
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
|
_, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
|
|
np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),
|
|
num_col_classifier, self.tables, pixel_lines)
|
|
#print(time.time()-t_0_box,'time box in 2')
|
|
self.logger.info("num_col_classifier: %s", num_col_classifier)
|
|
|
|
if num_col_classifier >= 3:
|
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
regions_without_separators = regions_without_separators.astype(np.uint8)
|
|
regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6)
|
|
else:
|
|
regions_without_separators_d = regions_without_separators_d.astype(np.uint8)
|
|
regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6)
|
|
#print(time.time()-t_0_box,'time box in 3')
|
|
t1 = time.time()
|
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(
|
|
splitter_y_new, regions_without_separators, matrix_of_lines_ch,
|
|
num_col_classifier, erosion_hurts, self.tables, self.right2left)
|
|
boxes_d = None
|
|
self.logger.debug("len(boxes): %s", len(boxes))
|
|
#print(time.time()-t_0_box,'time box in 3.1')
|
|
|
|
if self.tables:
|
|
if self.light_version:
|
|
pass
|
|
else:
|
|
text_regions_p_tables = np.copy(text_regions_p)
|
|
text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10
|
|
pixel_line = 3
|
|
img_revised_tab2 = self.add_tables_heuristic_to_layout(
|
|
text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables,
|
|
num_col_classifier , 0.000005, pixel_line)
|
|
#print(time.time()-t_0_box,'time box in 3.2')
|
|
img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(
|
|
img_revised_tab2, table_prediction, 10, num_col_classifier)
|
|
#print(time.time()-t_0_box,'time box in 3.3')
|
|
else:
|
|
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(
|
|
splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d,
|
|
num_col_classifier, erosion_hurts, self.tables, self.right2left)
|
|
boxes = None
|
|
self.logger.debug("len(boxes): %s", len(boxes_d))
|
|
|
|
if self.tables:
|
|
if self.light_version:
|
|
pass
|
|
else:
|
|
text_regions_p_tables = np.copy(text_regions_p_1_n)
|
|
text_regions_p_tables =np.round(text_regions_p_tables)
|
|
text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10
|
|
|
|
pixel_line = 3
|
|
img_revised_tab2 = self.add_tables_heuristic_to_layout(
|
|
text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables,
|
|
num_col_classifier, 0.000005, pixel_line)
|
|
img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(
|
|
img_revised_tab2, table_prediction_n, 10, num_col_classifier)
|
|
|
|
img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew)
|
|
img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated)
|
|
img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8)
|
|
img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1])
|
|
#print(time.time()-t_0_box,'time box in 4')
|
|
self.logger.info("detecting boxes took %.1fs", time.time() - t1)
|
|
|
|
if self.tables:
|
|
if self.light_version:
|
|
text_regions_p[:,:][table_prediction[:,:]==1] = 10
|
|
img_revised_tab=text_regions_p[:,:]
|
|
else:
|
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
img_revised_tab = np.copy(img_revised_tab2[:,:,0])
|
|
img_revised_tab[:,:][(text_regions_p[:,:] == 1) & (img_revised_tab[:,:] != 10)] = 1
|
|
else:
|
|
img_revised_tab = np.copy(text_regions_p[:,:])
|
|
img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0
|
|
img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10
|
|
|
|
text_regions_p[:,:][text_regions_p[:,:]==10] = 0
|
|
text_regions_p[:,:][img_revised_tab[:,:]==10] = 10
|
|
else:
|
|
img_revised_tab=text_regions_p[:,:]
|
|
#img_revised_tab = text_regions_p[:, :]
|
|
if self.light_version:
|
|
polygons_of_images = return_contours_of_interested_region(text_regions_p, 2)
|
|
else:
|
|
polygons_of_images = return_contours_of_interested_region(img_revised_tab, 2)
|
|
|
|
pixel_img = 4
|
|
min_area_mar = 0.00001
|
|
if self.light_version:
|
|
marginal_mask = (text_regions_p[:,:]==pixel_img)*1
|
|
marginal_mask = marginal_mask.astype('uint8')
|
|
marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2)
|
|
|
|
polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar)
|
|
else:
|
|
polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar)
|
|
|
|
pixel_img = 10
|
|
contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar)
|
|
#print(time.time()-t_0_box,'time box in 5')
|
|
self.logger.debug('exit run_boxes_no_full_layout')
|
|
return (polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d,
|
|
regions_without_separators_d, boxes, boxes_d,
|
|
polygons_of_marginals, contours_tables)
|
|
|
|
def run_boxes_full_layout(
|
|
self, image_page, textline_mask_tot, text_regions_p,
|
|
slope_deskew, num_col_classifier, img_only_regions,
|
|
table_prediction, erosion_hurts, img_bin_light):
|
|
|
|
self.logger.debug('enter run_boxes_full_layout')
|
|
t_full0 = time.time()
|
|
if self.tables:
|
|
if self.light_version:
|
|
text_regions_p[:,:][table_prediction[:,:]==1] = 10
|
|
img_revised_tab = text_regions_p[:,:]
|
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
|
image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \
|
|
rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew)
|
|
|
|
text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1])
|
|
textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1])
|
|
table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1])
|
|
|
|
regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1
|
|
regions_without_separators_d[table_prediction_n[:,:] == 1] = 1
|
|
else:
|
|
text_regions_p_1_n = None
|
|
textline_mask_tot_d = None
|
|
regions_without_separators_d = None
|
|
# regions_without_separators = ( text_regions_p[:,:]==1 | text_regions_p[:,:]==2 )*1
|
|
#self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions)
|
|
regions_without_separators = (text_regions_p[:,:] == 1)*1
|
|
regions_without_separators[table_prediction == 1] = 1
|
|
|
|
else:
|
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
|
image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \
|
|
rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew)
|
|
|
|
text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1])
|
|
textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1])
|
|
table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1])
|
|
|
|
regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1
|
|
regions_without_separators_d[table_prediction_n[:,:] == 1] = 1
|
|
else:
|
|
text_regions_p_1_n = None
|
|
textline_mask_tot_d = None
|
|
regions_without_separators_d = None
|
|
|
|
# regions_without_separators = ( text_regions_p[:,:]==1 | text_regions_p[:,:]==2 )*1
|
|
#self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions)
|
|
regions_without_separators = (text_regions_p[:,:] == 1)*1
|
|
regions_without_separators[table_prediction == 1] = 1
|
|
|
|
pixel_lines=3
|
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(
|
|
np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2),
|
|
num_col_classifier, self.tables, pixel_lines)
|
|
|
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
|
num_col_d, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
|
|
np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),
|
|
num_col_classifier, self.tables, pixel_lines)
|
|
|
|
if num_col_classifier>=3:
|
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
regions_without_separators = regions_without_separators.astype(np.uint8)
|
|
regions_without_separators = cv2.erode(regions_without_separators[:,:], KERNEL, iterations=6)
|
|
|
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
|
regions_without_separators_d = regions_without_separators_d.astype(np.uint8)
|
|
regions_without_separators_d = cv2.erode(regions_without_separators_d[:,:], KERNEL, iterations=6)
|
|
else:
|
|
pass
|
|
|
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(
|
|
splitter_y_new, regions_without_separators, matrix_of_lines_ch,
|
|
num_col_classifier, erosion_hurts, self.tables, self.right2left)
|
|
text_regions_p_tables = np.copy(text_regions_p)
|
|
text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10
|
|
pixel_line = 3
|
|
img_revised_tab2 = self.add_tables_heuristic_to_layout(
|
|
text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables,
|
|
num_col_classifier , 0.000005, pixel_line)
|
|
|
|
img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(
|
|
img_revised_tab2, table_prediction, 10, num_col_classifier)
|
|
else:
|
|
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(
|
|
splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d,
|
|
num_col_classifier, erosion_hurts, self.tables, self.right2left)
|
|
text_regions_p_tables = np.copy(text_regions_p_1_n)
|
|
text_regions_p_tables = np.round(text_regions_p_tables)
|
|
text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10
|
|
|
|
pixel_line = 3
|
|
img_revised_tab2 = self.add_tables_heuristic_to_layout(
|
|
text_regions_p_tables, boxes_d, 0, splitter_y_new_d, peaks_neg_tot_tables_d, text_regions_p_tables,
|
|
num_col_classifier, 0.000005, pixel_line)
|
|
|
|
img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(
|
|
img_revised_tab2, table_prediction_n, 10, num_col_classifier)
|
|
img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew)
|
|
|
|
img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated)
|
|
img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8)
|
|
|
|
img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1])
|
|
|
|
if np.abs(slope_deskew) < 0.13:
|
|
img_revised_tab = np.copy(img_revised_tab2[:,:,0])
|
|
else:
|
|
img_revised_tab = np.copy(text_regions_p[:,:])
|
|
img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0
|
|
img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10
|
|
|
|
##img_revised_tab=img_revised_tab2[:,:,0]
|
|
#img_revised_tab=text_regions_p[:,:]
|
|
text_regions_p[:,:][text_regions_p[:,:]==10] = 0
|
|
text_regions_p[:,:][img_revised_tab[:,:]==10] = 10
|
|
#img_revised_tab[img_revised_tab2[:,:,0]==10] =10
|
|
|
|
pixel_img = 4
|
|
min_area_mar = 0.00001
|
|
|
|
if self.light_version:
|
|
marginal_mask = (text_regions_p[:,:]==pixel_img)*1
|
|
marginal_mask = marginal_mask.astype('uint8')
|
|
marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2)
|
|
|
|
polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar)
|
|
else:
|
|
polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar)
|
|
|
|
pixel_img = 10
|
|
contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar)
|
|
|
|
# set first model with second model
|
|
text_regions_p[:, :][text_regions_p[:, :] == 2] = 5
|
|
text_regions_p[:, :][text_regions_p[:, :] == 3] = 6
|
|
text_regions_p[:, :][text_regions_p[:, :] == 4] = 8
|
|
|
|
image_page = image_page.astype(np.uint8)
|
|
#print("full inside 1", time.time()- t_full0)
|
|
regions_fully, regions_fully_only_drop = self.extract_text_regions_new(
|
|
img_bin_light if self.light_version else image_page,
|
|
False, cols=num_col_classifier)
|
|
#print("full inside 2", time.time()- t_full0)
|
|
# 6 is the separators lable in old full layout model
|
|
# 4 is the drop capital class in old full layout model
|
|
# in the new full layout drop capital is 3 and separators are 5
|
|
|
|
text_regions_p[:,:][regions_fully[:,:,0]==5]=6
|
|
###regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4
|
|
|
|
#text_regions_p[:,:][regions_fully[:,:,0]==6]=6
|
|
##regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p)
|
|
##regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4
|
|
drop_capital_label_in_full_layout_model = 3
|
|
|
|
drops = (regions_fully[:,:,0]==drop_capital_label_in_full_layout_model)*1
|
|
drops= drops.astype(np.uint8)
|
|
|
|
regions_fully[:,:,0][regions_fully[:,:,0]==drop_capital_label_in_full_layout_model] = 1
|
|
|
|
drops = cv2.erode(drops[:,:], KERNEL, iterations=1)
|
|
regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model
|
|
|
|
regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(
|
|
regions_fully, drop_capital_label_in_full_layout_model, text_regions_p)
|
|
##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier)
|
|
##if num_col_classifier > 2:
|
|
##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0
|
|
##else:
|
|
##regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p)
|
|
|
|
###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions)
|
|
# plt.imshow(regions_fully[:,:,0])
|
|
# plt.show()
|
|
text_regions_p[:, :][regions_fully[:, :, 0] == drop_capital_label_in_full_layout_model] = 4
|
|
####text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4
|
|
#plt.imshow(text_regions_p)
|
|
#plt.show()
|
|
####if not self.tables:
|
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
|
_, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = rotation_not_90_func_full_layout(
|
|
image_page, textline_mask_tot, text_regions_p, regions_fully, slope_deskew)
|
|
|
|
text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1])
|
|
textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1])
|
|
regions_fully_n = resize_image(regions_fully_n, text_regions_p.shape[0], text_regions_p.shape[1])
|
|
if not self.tables:
|
|
regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1
|
|
else:
|
|
text_regions_p_1_n = None
|
|
textline_mask_tot_d = None
|
|
regions_without_separators_d = None
|
|
if not self.tables:
|
|
regions_without_separators = (text_regions_p[:, :] == 1) * 1
|
|
img_revised_tab = np.copy(text_regions_p[:, :])
|
|
polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5)
|
|
|
|
self.logger.debug('exit run_boxes_full_layout')
|
|
#print("full inside 3", time.time()- t_full0)
|
|
return (polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d,
|
|
regions_without_separators_d, regions_fully, regions_without_separators,
|
|
polygons_of_marginals, contours_tables)
|
|
|
|
@staticmethod
|
|
def our_load_model(model_file):
|
|
if model_file.endswith('.h5') and Path(model_file[:-3]).exists():
|
|
# prefer SavedModel over HDF5 format if it exists
|
|
model_file = model_file[:-3]
|
|
try:
|
|
model = load_model(model_file, compile=False)
|
|
except:
|
|
model = load_model(model_file, compile=False, custom_objects={
|
|
"PatchEncoder": PatchEncoder, "Patches": Patches})
|
|
return model
|
|
|
|
def do_order_of_regions_with_model(self, contours_only_text_parent, contours_only_text_parent_h, text_regions_p):
|
|
y_len = text_regions_p.shape[0]
|
|
x_len = text_regions_p.shape[1]
|
|
|
|
|
|
img_poly = np.zeros((y_len,x_len), dtype='uint8')
|
|
img_poly[text_regions_p[:,:]==1] = 1
|
|
img_poly[text_regions_p[:,:]==2] = 2
|
|
img_poly[text_regions_p[:,:]==3] = 4
|
|
img_poly[text_regions_p[:,:]==6] = 5
|
|
|
|
|
|
###temp
|
|
##sep_mask = (img_poly==5)*1
|
|
##sep_mask = sep_mask.astype('uint8')
|
|
##sep_mask = cv2.erode(sep_mask, kernel=KERNEL, iterations=2)
|
|
##img_poly[img_poly==5] = 0
|
|
##img_poly[sep_mask==1] = 5
|
|
###
|
|
|
|
img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8')
|
|
if contours_only_text_parent_h:
|
|
_, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(
|
|
contours_only_text_parent_h)
|
|
for j in range(len(cy_main)):
|
|
img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,
|
|
int(x_min_main[j]):int(x_max_main[j])] = 1
|
|
co_text_all = contours_only_text_parent + contours_only_text_parent_h
|
|
else:
|
|
co_text_all = contours_only_text_parent
|
|
|
|
if not len(co_text_all):
|
|
return [], []
|
|
|
|
labels_con = np.zeros((int(y_len /6.), int(x_len/6.), len(co_text_all)), dtype=bool)
|
|
co_text_all = [(i/6).astype(int) for i in co_text_all]
|
|
for i in range(len(co_text_all)):
|
|
img = labels_con[:,:,i].astype(np.uint8)
|
|
|
|
#img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
|
|
|
|
cv2.fillPoly(img, pts=[co_text_all[i]], color=(1,))
|
|
labels_con[:,:,i] = img
|
|
|
|
height1 =672#448
|
|
width1 = 448#224
|
|
|
|
height2 =672#448
|
|
width2= 448#224
|
|
|
|
height3 =672#448
|
|
width3 = 448#224
|
|
|
|
labels_con = resize_image(labels_con.astype(np.uint8), height1, width1).astype(bool)
|
|
img_header_and_sep = resize_image(img_header_and_sep, height1, width1)
|
|
img_poly = resize_image(img_poly, height3, width3)
|
|
|
|
|
|
inference_bs = 3
|
|
input_1 = np.zeros((inference_bs, height1, width1, 3))
|
|
ordered = [list(range(len(co_text_all)))]
|
|
index_update = 0
|
|
#print(labels_con.shape[2],"number of regions for reading order")
|
|
while index_update>=0:
|
|
ij_list = ordered.pop(index_update)
|
|
i = ij_list.pop(0)
|
|
|
|
ante_list = []
|
|
post_list = []
|
|
tot_counter = 0
|
|
batch = []
|
|
for j in ij_list:
|
|
img1 = labels_con[:,:,i].astype(float)
|
|
img2 = labels_con[:,:,j].astype(float)
|
|
img1[img_poly==5] = 2
|
|
img2[img_poly==5] = 2
|
|
img1[img_header_and_sep==1] = 3
|
|
img2[img_header_and_sep==1] = 3
|
|
|
|
input_1[len(batch), :, :, 0] = img1 / 3.
|
|
input_1[len(batch), :, :, 2] = img2 / 3.
|
|
input_1[len(batch), :, :, 1] = img_poly / 5.
|
|
|
|
tot_counter += 1
|
|
batch.append(j)
|
|
if tot_counter % inference_bs == 0 or tot_counter == len(ij_list):
|
|
y_pr = self.model_reading_order.predict(input_1 , verbose=0)
|
|
for jb, j in enumerate(batch):
|
|
if y_pr[jb][0]>=0.5:
|
|
post_list.append(j)
|
|
else:
|
|
ante_list.append(j)
|
|
batch = []
|
|
|
|
if len(ante_list):
|
|
ordered.insert(index_update, ante_list)
|
|
index_update += 1
|
|
ordered.insert(index_update, [i])
|
|
if len(post_list):
|
|
ordered.insert(index_update + 1, post_list)
|
|
|
|
index_update = -1
|
|
for index_next, ij_list in enumerate(ordered):
|
|
if len(ij_list) > 1:
|
|
index_update = index_next
|
|
break
|
|
|
|
ordered = [i[0] for i in ordered]
|
|
region_ids = ['region_%04d' % i for i in range(len(co_text_all))]
|
|
return ordered, region_ids
|
|
|
|
def return_start_and_end_of_common_text_of_textline_ocr(self, textline_image, ind_tot):
|
|
width = np.shape(textline_image)[1]
|
|
height = np.shape(textline_image)[0]
|
|
common_window = int(0.2*width)
|
|
|
|
width1 = int ( width/2. - common_window )
|
|
width2 = int ( width/2. + common_window )
|
|
|
|
img_sum = np.sum(textline_image[:,:,0], axis=0)
|
|
sum_smoothed = gaussian_filter1d(img_sum, 3)
|
|
|
|
peaks_real, _ = find_peaks(sum_smoothed, height=0)
|
|
if len(peaks_real)>70:
|
|
print(len(peaks_real), 'len(peaks_real)')
|
|
|
|
peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
|
|
|
|
arg_sort = np.argsort(sum_smoothed[peaks_real])
|
|
arg_sort4 =arg_sort[::-1][:4]
|
|
peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
|
|
argsort_sorted = np.argsort(peaks_sort_4)
|
|
|
|
first_4_sorted = peaks_sort_4[argsort_sorted]
|
|
y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]]
|
|
#print(first_4_sorted,'first_4_sorted')
|
|
|
|
arg_sortnew = np.argsort(y_4_sorted)
|
|
peaks_final =np.sort( first_4_sorted[arg_sortnew][2:] )
|
|
|
|
#plt.figure(ind_tot)
|
|
#plt.imshow(textline_image)
|
|
#plt.plot([peaks_final[0], peaks_final[0]], [0, height-1])
|
|
#plt.plot([peaks_final[1], peaks_final[1]], [0, height-1])
|
|
#plt.savefig('./'+str(ind_tot)+'.png')
|
|
|
|
return peaks_final[0], peaks_final[1]
|
|
else:
|
|
pass
|
|
|
|
def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image, ind_tot):
|
|
width = np.shape(textline_image)[1]
|
|
height = np.shape(textline_image)[0]
|
|
common_window = int(0.06*width)
|
|
|
|
width1 = int ( width/2. - common_window )
|
|
width2 = int ( width/2. + common_window )
|
|
|
|
img_sum = np.sum(textline_image[:,:,0], axis=0)
|
|
sum_smoothed = gaussian_filter1d(img_sum, 3)
|
|
|
|
peaks_real, _ = find_peaks(sum_smoothed, height=0)
|
|
if len(peaks_real)>70:
|
|
#print(len(peaks_real), 'len(peaks_real)')
|
|
|
|
peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
|
|
|
|
arg_max = np.argmax(sum_smoothed[peaks_real])
|
|
peaks_final = peaks_real[arg_max]
|
|
|
|
#plt.figure(ind_tot)
|
|
#plt.imshow(textline_image)
|
|
#plt.plot([peaks_final, peaks_final], [0, height-1])
|
|
##plt.plot([peaks_final[1], peaks_final[1]], [0, height-1])
|
|
#plt.savefig('./'+str(ind_tot)+'.png')
|
|
|
|
return peaks_final
|
|
else:
|
|
return None
|
|
|
|
def return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
|
|
self, peaks_real, sum_smoothed, start_split, end_split):
|
|
|
|
peaks_real = peaks_real[(peaks_real<end_split) & (peaks_real>start_split)]
|
|
|
|
arg_sort = np.argsort(sum_smoothed[peaks_real])
|
|
arg_sort4 =arg_sort[::-1][:4]
|
|
peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
|
|
argsort_sorted = np.argsort(peaks_sort_4)
|
|
|
|
first_4_sorted = peaks_sort_4[argsort_sorted]
|
|
y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]]
|
|
#print(first_4_sorted,'first_4_sorted')
|
|
|
|
arg_sortnew = np.argsort(y_4_sorted)
|
|
peaks_final =np.sort( first_4_sorted[arg_sortnew][3:] )
|
|
return peaks_final[0]
|
|
|
|
def return_start_and_end_of_common_text_of_textline_ocr_new(self, textline_image, ind_tot):
|
|
width = np.shape(textline_image)[1]
|
|
height = np.shape(textline_image)[0]
|
|
common_window = int(0.15*width)
|
|
|
|
width1 = int ( width/2. - common_window )
|
|
width2 = int ( width/2. + common_window )
|
|
mid = int(width/2.)
|
|
|
|
img_sum = np.sum(textline_image[:,:,0], axis=0)
|
|
sum_smoothed = gaussian_filter1d(img_sum, 3)
|
|
|
|
peaks_real, _ = find_peaks(sum_smoothed, height=0)
|
|
if len(peaks_real)>70:
|
|
peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
|
|
peaks_real, sum_smoothed, width1, mid+2)
|
|
peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
|
|
peaks_real, sum_smoothed, mid-2, width2)
|
|
|
|
#plt.figure(ind_tot)
|
|
#plt.imshow(textline_image)
|
|
#plt.plot([peak_start, peak_start], [0, height-1])
|
|
#plt.plot([peak_end, peak_end], [0, height-1])
|
|
#plt.savefig('./'+str(ind_tot)+'.png')
|
|
|
|
return peak_start, peak_end
|
|
else:
|
|
pass
|
|
|
|
def return_ocr_of_textline_without_common_section(
|
|
self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot):
|
|
|
|
if h2w_ratio > 0.05:
|
|
pixel_values = processor(textline_image, return_tensors="pt").pixel_values
|
|
generated_ids = model_ocr.generate(pixel_values.to(device))
|
|
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
else:
|
|
#width = np.shape(textline_image)[1]
|
|
#height = np.shape(textline_image)[0]
|
|
#common_window = int(0.3*width)
|
|
#width1 = int ( width/2. - common_window )
|
|
#width2 = int ( width/2. + common_window )
|
|
|
|
split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(
|
|
textline_image, ind_tot)
|
|
if split_point:
|
|
image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height))
|
|
image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height))
|
|
|
|
#pixel_values1 = processor(image1, return_tensors="pt").pixel_values
|
|
#pixel_values2 = processor(image2, return_tensors="pt").pixel_values
|
|
|
|
pixel_values_merged = processor([image1,image2], return_tensors="pt").pixel_values
|
|
generated_ids_merged = model_ocr.generate(pixel_values_merged.to(device))
|
|
generated_text_merged = processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
|
|
|
|
#print(generated_text_merged,'generated_text_merged')
|
|
|
|
#generated_ids1 = model_ocr.generate(pixel_values1.to(device))
|
|
#generated_ids2 = model_ocr.generate(pixel_values2.to(device))
|
|
|
|
#generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0]
|
|
#generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0]
|
|
|
|
#generated_text = generated_text1 + ' ' + generated_text2
|
|
generated_text = generated_text_merged[0] + ' ' + generated_text_merged[1]
|
|
|
|
#print(generated_text1,'generated_text1')
|
|
#print(generated_text2, 'generated_text2')
|
|
#print('########################################')
|
|
else:
|
|
pixel_values = processor(textline_image, return_tensors="pt").pixel_values
|
|
generated_ids = model_ocr.generate(pixel_values.to(device))
|
|
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
|
|
#print(generated_text,'generated_text')
|
|
#print('########################################')
|
|
return generated_text
|
|
|
|
def return_ocr_of_textline(
|
|
self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot):
|
|
|
|
if h2w_ratio > 0.05:
|
|
pixel_values = processor(textline_image, return_tensors="pt").pixel_values
|
|
generated_ids = model_ocr.generate(pixel_values.to(device))
|
|
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
else:
|
|
#width = np.shape(textline_image)[1]
|
|
#height = np.shape(textline_image)[0]
|
|
#common_window = int(0.3*width)
|
|
#width1 = int ( width/2. - common_window )
|
|
#width2 = int ( width/2. + common_window )
|
|
|
|
try:
|
|
width1, width2 = self.return_start_and_end_of_common_text_of_textline_ocr_new(textline_image, ind_tot)
|
|
|
|
image1 = textline_image[:, :width2,:]# image.crop((0, 0, width2, height))
|
|
image2 = textline_image[:, width1:,:]#image.crop((width1, 0, width, height))
|
|
|
|
pixel_values1 = processor(image1, return_tensors="pt").pixel_values
|
|
pixel_values2 = processor(image2, return_tensors="pt").pixel_values
|
|
|
|
generated_ids1 = model_ocr.generate(pixel_values1.to(device))
|
|
generated_ids2 = model_ocr.generate(pixel_values2.to(device))
|
|
|
|
generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0]
|
|
generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0]
|
|
#print(generated_text1,'generated_text1')
|
|
#print(generated_text2, 'generated_text2')
|
|
#print('########################################')
|
|
|
|
match = sq(None, generated_text1, generated_text2).find_longest_match(
|
|
0, len(generated_text1), 0, len(generated_text2))
|
|
generated_text = generated_text1 + generated_text2[match.b+match.size:]
|
|
except:
|
|
pixel_values = processor(textline_image, return_tensors="pt").pixel_values
|
|
generated_ids = model_ocr.generate(pixel_values.to(device))
|
|
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
|
|
return generated_text
|
|
|
|
def return_textline_contour_with_added_box_coordinate(self, textline_contour, box_ind):
|
|
textline_contour[:,0] = textline_contour[:,0] + box_ind[2]
|
|
textline_contour[:,1] = textline_contour[:,1] + box_ind[0]
|
|
return textline_contour
|
|
|
|
def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes):
|
|
return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))]
|
|
|
|
def return_it_in_two_groups(self, x_differential):
|
|
split = [ind if x_differential[ind]!=x_differential[ind+1] else -1
|
|
for ind in range(len(x_differential)-1)]
|
|
split_masked = list( np.array(split[:])[np.array(split[:])!=-1] )
|
|
if 0 not in split_masked:
|
|
split_masked.insert(0, -1)
|
|
split_masked.append(len(x_differential)-1)
|
|
|
|
split_masked = np.array(split_masked) +1
|
|
|
|
sums = [np.sum(x_differential[split_masked[ind]:split_masked[ind+1]])
|
|
for ind in range(len(split_masked)-1)]
|
|
|
|
indexes_to_bec_changed = [ind if (np.abs(sums[ind-1]) > np.abs(sums[ind]) and
|
|
np.abs(sums[ind+1]) > np.abs(sums[ind])) else -1
|
|
for ind in range(1,len(sums)-1)]
|
|
indexes_to_bec_changed_filtered = np.array(indexes_to_bec_changed)[np.array(indexes_to_bec_changed)!=-1]
|
|
|
|
x_differential_new = np.copy(x_differential)
|
|
for i in indexes_to_bec_changed_filtered:
|
|
i_slice = slice(split_masked[i], split_masked[i+1])
|
|
x_differential_new[i_slice] = -1 * np.array(x_differential)[i_slice]
|
|
|
|
return x_differential_new
|
|
|
|
def dilate_textregions_contours_textline_version(self, all_found_textline_polygons):
|
|
#print(all_found_textline_polygons)
|
|
for j in range(len(all_found_textline_polygons)):
|
|
for ij in range(len(all_found_textline_polygons[j])):
|
|
con_ind = all_found_textline_polygons[j][ij]
|
|
area = cv2.contourArea(con_ind)
|
|
con_ind = con_ind.astype(float)
|
|
|
|
x_differential = np.diff( con_ind[:,0,0])
|
|
y_differential = np.diff( con_ind[:,0,1])
|
|
|
|
x_differential = gaussian_filter1d(x_differential, 0.1)
|
|
y_differential = gaussian_filter1d(y_differential, 0.1)
|
|
|
|
x_min = float(np.min( con_ind[:,0,0] ))
|
|
y_min = float(np.min( con_ind[:,0,1] ))
|
|
|
|
x_max = float(np.max( con_ind[:,0,0] ))
|
|
y_max = float(np.max( con_ind[:,0,1] ))
|
|
|
|
x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential]
|
|
y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential]
|
|
|
|
abs_diff=abs(abs(x_differential)- abs(y_differential) )
|
|
|
|
inc_x = np.zeros(len(x_differential)+1)
|
|
inc_y = np.zeros(len(x_differential)+1)
|
|
|
|
if (y_max-y_min) <= (x_max-x_min):
|
|
dilation_m1 = round(area / (x_max-x_min) * 0.12)
|
|
else:
|
|
dilation_m1 = round(area / (y_max-y_min) * 0.12)
|
|
|
|
if dilation_m1>8:
|
|
dilation_m1 = 8
|
|
if dilation_m1<6:
|
|
dilation_m1 = 6
|
|
#print(dilation_m1, 'dilation_m1')
|
|
dilation_m1 = 6
|
|
dilation_m2 = int(dilation_m1/2.) +1
|
|
|
|
for i in range(len(x_differential)):
|
|
if abs_diff[i]==0:
|
|
inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i])
|
|
inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i])
|
|
elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0:
|
|
inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i])
|
|
elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0:
|
|
inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i])
|
|
|
|
elif abs_diff[i]!=0 and abs_diff[i]>=3:
|
|
if abs(x_differential[i])>abs(y_differential[i]):
|
|
inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i])
|
|
else:
|
|
inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i])
|
|
else:
|
|
inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i])
|
|
inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i])
|
|
|
|
inc_x[0] = inc_x[-1]
|
|
inc_y[0] = inc_y[-1]
|
|
|
|
con_scaled = con_ind*1
|
|
|
|
con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:]
|
|
con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:]
|
|
|
|
con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0
|
|
con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0
|
|
|
|
area_scaled = cv2.contourArea(con_scaled.astype(np.int32))
|
|
|
|
con_ind = con_ind.astype(np.int32)
|
|
|
|
results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False)
|
|
for ind in range(len(con_scaled[:,0, 1])) ]
|
|
results = np.array(results)
|
|
#print(results,'results')
|
|
results[results==0] = 1
|
|
|
|
diff_result = np.diff(results)
|
|
|
|
indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2]
|
|
indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2]
|
|
|
|
if results[0]==1:
|
|
con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1]
|
|
con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0]
|
|
#indices_2 = indices_2[1:]
|
|
indices_m2 = indices_m2[1:]
|
|
|
|
if len(indices_2)>len(indices_m2):
|
|
con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1]
|
|
con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0]
|
|
indices_2 = indices_2[:-1]
|
|
|
|
for ii in range(len(indices_2)):
|
|
con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1]
|
|
con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0]
|
|
|
|
all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1]
|
|
all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0]
|
|
return all_found_textline_polygons
|
|
|
|
def dilate_textregions_contours(self, all_found_textline_polygons):
|
|
#print(all_found_textline_polygons)
|
|
for j in range(len(all_found_textline_polygons)):
|
|
con_ind = all_found_textline_polygons[j]
|
|
#print(len(con_ind[:,0,0]),'con_ind[:,0,0]')
|
|
area = cv2.contourArea(con_ind)
|
|
con_ind = con_ind.astype(float)
|
|
|
|
x_differential = np.diff( con_ind[:,0,0])
|
|
y_differential = np.diff( con_ind[:,0,1])
|
|
|
|
x_differential = gaussian_filter1d(x_differential, 0.1)
|
|
y_differential = gaussian_filter1d(y_differential, 0.1)
|
|
|
|
x_min = float(np.min( con_ind[:,0,0] ))
|
|
y_min = float(np.min( con_ind[:,0,1] ))
|
|
|
|
x_max = float(np.max( con_ind[:,0,0] ))
|
|
y_max = float(np.max( con_ind[:,0,1] ))
|
|
|
|
x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential]
|
|
y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential]
|
|
|
|
abs_diff=abs(abs(x_differential)- abs(y_differential) )
|
|
|
|
inc_x = np.zeros(len(x_differential)+1)
|
|
inc_y = np.zeros(len(x_differential)+1)
|
|
|
|
if (y_max-y_min) <= (x_max-x_min):
|
|
dilation_m1 = round(area / (x_max-x_min) * 0.12)
|
|
else:
|
|
dilation_m1 = round(area / (y_max-y_min) * 0.12)
|
|
|
|
if dilation_m1>8:
|
|
dilation_m1 = 8
|
|
if dilation_m1<6:
|
|
dilation_m1 = 6
|
|
#print(dilation_m1, 'dilation_m1')
|
|
dilation_m1 = 6
|
|
dilation_m2 = int(dilation_m1/2.) +1
|
|
|
|
for i in range(len(x_differential)):
|
|
if abs_diff[i]==0:
|
|
inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i])
|
|
inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i])
|
|
elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0:
|
|
inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i])
|
|
elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0:
|
|
inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i])
|
|
|
|
elif abs_diff[i]!=0 and abs_diff[i]>=3:
|
|
if abs(x_differential[i])>abs(y_differential[i]):
|
|
inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i])
|
|
else:
|
|
inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i])
|
|
else:
|
|
inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i])
|
|
inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i])
|
|
|
|
inc_x[0] = inc_x[-1]
|
|
inc_y[0] = inc_y[-1]
|
|
|
|
con_scaled = con_ind*1
|
|
|
|
con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:]
|
|
con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:]
|
|
|
|
con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0
|
|
con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0
|
|
|
|
area_scaled = cv2.contourArea(con_scaled.astype(np.int32))
|
|
|
|
con_ind = con_ind.astype(np.int32)
|
|
|
|
results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False)
|
|
for ind in range(len(con_scaled[:,0, 1])) ]
|
|
results = np.array(results)
|
|
#print(results,'results')
|
|
results[results==0] = 1
|
|
|
|
diff_result = np.diff(results)
|
|
indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2]
|
|
indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2]
|
|
|
|
if results[0]==1:
|
|
con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1]
|
|
con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0]
|
|
#indices_2 = indices_2[1:]
|
|
indices_m2 = indices_m2[1:]
|
|
|
|
if len(indices_2)>len(indices_m2):
|
|
con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1]
|
|
con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0]
|
|
indices_2 = indices_2[:-1]
|
|
|
|
for ii in range(len(indices_2)):
|
|
con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1]
|
|
con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0]
|
|
|
|
all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1]
|
|
all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0]
|
|
return all_found_textline_polygons
|
|
|
|
def dilate_textline_contours(self, all_found_textline_polygons):
|
|
for j in range(len(all_found_textline_polygons)):
|
|
for ij in range(len(all_found_textline_polygons[j])):
|
|
con_ind = all_found_textline_polygons[j][ij]
|
|
area = cv2.contourArea(con_ind)
|
|
|
|
con_ind = con_ind.astype(float)
|
|
|
|
x_differential = np.diff( con_ind[:,0,0])
|
|
y_differential = np.diff( con_ind[:,0,1])
|
|
|
|
x_differential = gaussian_filter1d(x_differential, 3)
|
|
y_differential = gaussian_filter1d(y_differential, 3)
|
|
|
|
x_min = float(np.min( con_ind[:,0,0] ))
|
|
y_min = float(np.min( con_ind[:,0,1] ))
|
|
|
|
x_max = float(np.max( con_ind[:,0,0] ))
|
|
y_max = float(np.max( con_ind[:,0,1] ))
|
|
|
|
x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential]
|
|
y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential]
|
|
|
|
abs_diff=abs(abs(x_differential)- abs(y_differential) )
|
|
|
|
inc_x = np.zeros(len(x_differential)+1)
|
|
inc_y = np.zeros(len(x_differential)+1)
|
|
|
|
if (y_max-y_min) <= (x_max-x_min):
|
|
dilation_m1 = round(area / (x_max-x_min) * 0.35)
|
|
else:
|
|
dilation_m1 = round(area / (y_max-y_min) * 0.35)
|
|
|
|
if dilation_m1>12:
|
|
dilation_m1 = 12
|
|
if dilation_m1<4:
|
|
dilation_m1 = 4
|
|
#print(dilation_m1, 'dilation_m1')
|
|
dilation_m2 = int(dilation_m1/2.) +1
|
|
|
|
for i in range(len(x_differential)):
|
|
if abs_diff[i]==0:
|
|
inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i])
|
|
inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i])
|
|
elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0:
|
|
inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i])
|
|
elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0:
|
|
inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i])
|
|
|
|
elif abs_diff[i]!=0 and abs_diff[i]>=3:
|
|
if abs(x_differential[i])>abs(y_differential[i]):
|
|
inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i])
|
|
else:
|
|
inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i])
|
|
else:
|
|
inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i])
|
|
inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i])
|
|
|
|
inc_x[0] = inc_x[-1]
|
|
inc_y[0] = inc_y[-1]
|
|
|
|
con_scaled = con_ind*1
|
|
|
|
con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:]
|
|
con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:]
|
|
|
|
con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0
|
|
con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0
|
|
|
|
con_ind = con_ind.astype(np.int32)
|
|
|
|
results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False)
|
|
for ind in range(len(con_scaled[:,0, 1])) ]
|
|
results = np.array(results)
|
|
results[results==0] = 1
|
|
|
|
diff_result = np.diff(results)
|
|
|
|
indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2]
|
|
indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2]
|
|
|
|
if results[0]==1:
|
|
con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1]
|
|
con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0]
|
|
indices_m2 = indices_m2[1:]
|
|
|
|
if len(indices_2)>len(indices_m2):
|
|
con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1]
|
|
con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0]
|
|
indices_2 = indices_2[:-1]
|
|
|
|
for ii in range(len(indices_2)):
|
|
con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1]
|
|
con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0]
|
|
|
|
all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1]
|
|
all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0]
|
|
return all_found_textline_polygons
|
|
|
|
def filter_contours_inside_a_bigger_one(self,contours, contours_d_ordered, image, marginal_cnts=None, type_contour="textregion"):
|
|
if type_contour=="textregion":
|
|
areas = [cv2.contourArea(contours[j]) for j in range(len(contours))]
|
|
area_tot = image.shape[0]*image.shape[1]
|
|
|
|
M_main = [cv2.moments(contours[j])
|
|
for j in range(len(contours))]
|
|
cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
|
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
|
|
|
areas_ratio = np.array(areas)/ area_tot
|
|
contours_index_small = [ind for ind in range(len(contours)) if areas_ratio[ind] < 1e-3]
|
|
contours_index_big = [ind for ind in range(len(contours)) if areas_ratio[ind] >= 1e-3]
|
|
|
|
#contours_> = [contours[ind] for ind in contours_index_big]
|
|
indexes_to_be_removed = []
|
|
for ind_small in contours_index_small:
|
|
results = [cv2.pointPolygonTest(contours[ind], (cx_main[ind_small], cy_main[ind_small]), False)
|
|
for ind in contours_index_big]
|
|
if marginal_cnts:
|
|
results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], (cx_main[ind_small], cy_main[ind_small]), False)
|
|
for ind in range(len(marginal_cnts))]
|
|
results_marginal = np.array(results_marginal)
|
|
|
|
if np.any(results_marginal==1):
|
|
indexes_to_be_removed.append(ind_small)
|
|
|
|
results = np.array(results)
|
|
|
|
if np.any(results==1):
|
|
indexes_to_be_removed.append(ind_small)
|
|
|
|
if len(indexes_to_be_removed)>0:
|
|
indexes_to_be_removed = np.unique(indexes_to_be_removed)
|
|
indexes_to_be_removed = np.sort(indexes_to_be_removed)[::-1]
|
|
for ind in indexes_to_be_removed:
|
|
contours.pop(ind)
|
|
if len(contours_d_ordered)>0:
|
|
contours_d_ordered.pop(ind)
|
|
|
|
return contours, contours_d_ordered
|
|
|
|
else:
|
|
contours_txtline_of_all_textregions = []
|
|
indexes_of_textline_tot = []
|
|
index_textline_inside_textregion = []
|
|
|
|
for jj in range(len(contours)):
|
|
contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours[jj]
|
|
|
|
ind_textline_inside_tr = list(range(len(contours[jj])))
|
|
index_textline_inside_textregion = index_textline_inside_textregion + ind_textline_inside_tr
|
|
ind_ins = [jj] * len(contours[jj])
|
|
indexes_of_textline_tot = indexes_of_textline_tot + ind_ins
|
|
|
|
M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j])
|
|
for j in range(len(contours_txtline_of_all_textregions))]
|
|
cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))]
|
|
cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))]
|
|
|
|
areas_tot = [cv2.contourArea(con_ind) for con_ind in contours_txtline_of_all_textregions]
|
|
area_tot_tot = image.shape[0]*image.shape[1]
|
|
|
|
textregion_index_to_del = []
|
|
textline_in_textregion_index_to_del = []
|
|
for ij in range(len(contours_txtline_of_all_textregions)):
|
|
args_all = list(np.array(range(len(contours_txtline_of_all_textregions))))
|
|
args_all.pop(ij)
|
|
|
|
areas_without = np.array(areas_tot)[args_all]
|
|
area_of_con_interest = areas_tot[ij]
|
|
|
|
args_with_bigger_area = np.array(args_all)[areas_without > 1.5*area_of_con_interest]
|
|
|
|
if len(args_with_bigger_area)>0:
|
|
results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False)
|
|
for ind in args_with_bigger_area ]
|
|
results = np.array(results)
|
|
if np.any(results==1):
|
|
#print(indexes_of_textline_tot[ij], index_textline_inside_textregion[ij])
|
|
textregion_index_to_del.append(int(indexes_of_textline_tot[ij]))
|
|
textline_in_textregion_index_to_del.append(int(index_textline_inside_textregion[ij]))
|
|
#contours[int(indexes_of_textline_tot[ij])].pop(int(index_textline_inside_textregion[ij]))
|
|
|
|
textregion_index_to_del = np.array(textregion_index_to_del)
|
|
textline_in_textregion_index_to_del = np.array(textline_in_textregion_index_to_del)
|
|
for ind_u_a_trs in np.unique(textregion_index_to_del):
|
|
textline_in_textregion_index_to_del_ind = textline_in_textregion_index_to_del[textregion_index_to_del==ind_u_a_trs]
|
|
textline_in_textregion_index_to_del_ind = np.sort(textline_in_textregion_index_to_del_ind)[::-1]
|
|
for ittrd in textline_in_textregion_index_to_del_ind:
|
|
contours[ind_u_a_trs].pop(ittrd)
|
|
|
|
return contours
|
|
|
|
def filter_contours_without_textline_inside(
|
|
self, contours,text_con_org, contours_textline, contours_only_text_parent_d_ordered, conf_contours_textregions):
|
|
###contours_txtline_of_all_textregions = []
|
|
###for jj in range(len(contours_textline)):
|
|
###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj]
|
|
|
|
###M_main_textline = [cv2.moments(contours_txtline_of_all_textregions[j])
|
|
### for j in range(len(contours_txtline_of_all_textregions))]
|
|
###cx_main_textline = [(M_main_textline[j]["m10"] / (M_main_textline[j]["m00"] + 1e-32))
|
|
### for j in range(len(M_main_textline))]
|
|
###cy_main_textline = [(M_main_textline[j]["m01"] / (M_main_textline[j]["m00"] + 1e-32))
|
|
### for j in range(len(M_main_textline))]
|
|
|
|
###M_main = [cv2.moments(contours[j]) for j in range(len(contours))]
|
|
###cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
|
###cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
|
|
|
###contours_with_textline = []
|
|
###for ind_tr, con_tr in enumerate(contours):
|
|
###results = [cv2.pointPolygonTest(con_tr, (cx_main_textline[index_textline_con], cy_main_textline[index_textline_con]), False)
|
|
### for index_textline_con in range(len(contours_txtline_of_all_textregions)) ]
|
|
###results = np.array(results)
|
|
###if np.any(results==1):
|
|
###contours_with_textline.append(con_tr)
|
|
|
|
textregion_index_to_del = []
|
|
for index_textregion, textlines_textregion in enumerate(contours_textline):
|
|
if len(textlines_textregion)==0:
|
|
textregion_index_to_del.append(index_textregion)
|
|
|
|
uniqe_args_trs = np.unique(textregion_index_to_del)
|
|
uniqe_args_trs_sorted = np.sort(uniqe_args_trs)[::-1]
|
|
|
|
for ind_u_a_trs in uniqe_args_trs_sorted:
|
|
conf_contours_textregions.pop(ind_u_a_trs)
|
|
contours.pop(ind_u_a_trs)
|
|
contours_textline.pop(ind_u_a_trs)
|
|
text_con_org.pop(ind_u_a_trs)
|
|
if len(contours_only_text_parent_d_ordered) > 0:
|
|
contours_only_text_parent_d_ordered.pop(ind_u_a_trs)
|
|
|
|
return contours, text_con_org, conf_contours_textregions, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours)))
|
|
|
|
def dilate_textlines(self, all_found_textline_polygons):
|
|
for j in range(len(all_found_textline_polygons)):
|
|
for i in range(len(all_found_textline_polygons[j])):
|
|
con_ind = all_found_textline_polygons[j][i]
|
|
con_ind = con_ind.astype(float)
|
|
|
|
x_differential = np.diff( con_ind[:,0,0])
|
|
y_differential = np.diff( con_ind[:,0,1])
|
|
|
|
x_min = float(np.min( con_ind[:,0,0] ))
|
|
y_min = float(np.min( con_ind[:,0,1] ))
|
|
|
|
x_max = float(np.max( con_ind[:,0,0] ))
|
|
y_max = float(np.max( con_ind[:,0,1] ))
|
|
|
|
if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70:
|
|
x_biger_than_x = np.abs(x_differential) > np.abs(y_differential)
|
|
mult = x_biger_than_x*x_differential
|
|
|
|
arg_min_mult = np.argmin(mult)
|
|
arg_max_mult = np.argmax(mult)
|
|
|
|
if y_differential[0]==0:
|
|
y_differential[0] = 0.1
|
|
if y_differential[-1]==0:
|
|
y_differential[-1]= 0.1
|
|
y_differential = [y_differential[ind] if y_differential[ind] != 0
|
|
else 0.5 * (y_differential[ind-1] + y_differential[ind+1])
|
|
for ind in range(len(y_differential))]
|
|
|
|
if y_differential[0]==0.1:
|
|
y_differential[0] = y_differential[1]
|
|
if y_differential[-1]==0.1:
|
|
y_differential[-1] = y_differential[-2]
|
|
y_differential.append(y_differential[0])
|
|
|
|
y_differential = [-1 if y_differential[ind] < 0 else 1
|
|
for ind in range(len(y_differential))]
|
|
y_differential = self.return_it_in_two_groups(y_differential)
|
|
y_differential = np.array(y_differential)
|
|
|
|
con_scaled = con_ind*1
|
|
con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential
|
|
con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8
|
|
con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8
|
|
|
|
try:
|
|
con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5
|
|
con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5
|
|
except:
|
|
pass
|
|
|
|
con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8
|
|
con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8
|
|
|
|
try:
|
|
con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5
|
|
con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5
|
|
except:
|
|
pass
|
|
|
|
else:
|
|
y_biger_than_x = np.abs(y_differential) > np.abs(x_differential)
|
|
mult = y_biger_than_x*y_differential
|
|
|
|
arg_min_mult = np.argmin(mult)
|
|
arg_max_mult = np.argmax(mult)
|
|
|
|
if x_differential[0]==0:
|
|
x_differential[0] = 0.1
|
|
if x_differential[-1]==0:
|
|
x_differential[-1]= 0.1
|
|
x_differential = [x_differential[ind] if x_differential[ind] != 0
|
|
else 0.5 * (x_differential[ind-1] + x_differential[ind+1])
|
|
for ind in range(len(x_differential))]
|
|
|
|
if x_differential[0]==0.1:
|
|
x_differential[0] = x_differential[1]
|
|
if x_differential[-1]==0.1:
|
|
x_differential[-1] = x_differential[-2]
|
|
x_differential.append(x_differential[0])
|
|
|
|
x_differential = [-1 if x_differential[ind] < 0 else 1
|
|
for ind in range(len(x_differential))]
|
|
x_differential = self.return_it_in_two_groups(x_differential)
|
|
x_differential = np.array(x_differential)
|
|
|
|
con_scaled = con_ind*1
|
|
con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential
|
|
con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8
|
|
con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8
|
|
|
|
try:
|
|
con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5
|
|
con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5
|
|
except:
|
|
pass
|
|
|
|
con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8
|
|
con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8
|
|
|
|
try:
|
|
con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5
|
|
con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5
|
|
except:
|
|
pass
|
|
|
|
con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0
|
|
con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0
|
|
|
|
all_found_textline_polygons[j][i][:,0,1] = con_scaled[:,0, 1]
|
|
all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0]
|
|
|
|
return all_found_textline_polygons
|
|
|
|
def delete_regions_without_textlines(
|
|
self, slopes, all_found_textline_polygons, boxes_text, txt_con_org,
|
|
contours_only_text_parent, index_by_text_par_con):
|
|
|
|
slopes_rem = []
|
|
all_found_textline_polygons_rem = []
|
|
boxes_text_rem = []
|
|
txt_con_org_rem = []
|
|
contours_only_text_parent_rem = []
|
|
index_by_text_par_con_rem = []
|
|
|
|
for i, ind_con in enumerate(all_found_textline_polygons):
|
|
if len(ind_con):
|
|
all_found_textline_polygons_rem.append(ind_con)
|
|
slopes_rem.append(slopes[i])
|
|
boxes_text_rem.append(boxes_text[i])
|
|
txt_con_org_rem.append(txt_con_org[i])
|
|
contours_only_text_parent_rem.append(contours_only_text_parent[i])
|
|
index_by_text_par_con_rem.append(index_by_text_par_con[i])
|
|
|
|
index_sort = np.argsort(index_by_text_par_con_rem)
|
|
indexes_new = np.array(range(len(index_by_text_par_con_rem)))
|
|
|
|
index_by_text_par_con_rem_sort = [indexes_new[index_sort==j][0]
|
|
for j in range(len(index_by_text_par_con_rem))]
|
|
|
|
return (slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem,
|
|
contours_only_text_parent_rem, index_by_text_par_con_rem_sort)
|
|
|
|
def run(self, image_filename : Optional[str] = None, dir_in : Optional[str] = None, overwrite : bool = False):
|
|
"""
|
|
Get image and scales, then extract the page of scanned image
|
|
"""
|
|
self.logger.debug("enter run")
|
|
t0_tot = time.time()
|
|
|
|
if dir_in:
|
|
self.ls_imgs = os.listdir(dir_in)
|
|
elif image_filename:
|
|
self.ls_imgs = [image_filename]
|
|
else:
|
|
raise ValueError("run requires either a single image filename or a directory")
|
|
|
|
for img_filename in self.ls_imgs:
|
|
self.logger.info(img_filename)
|
|
t0 = time.time()
|
|
|
|
self.reset_file_name_dir(os.path.join(dir_in or "", img_filename))
|
|
#print("text region early -11 in %.1fs", time.time() - t0)
|
|
if os.path.exists(self.writer.output_filename):
|
|
if overwrite:
|
|
self.logger.warning("will overwrite existing output file '%s'", self.writer.output_filename)
|
|
else:
|
|
self.logger.warning("will skip input for existing output file '%s'", self.writer.output_filename)
|
|
continue
|
|
|
|
pcgts = self.run_single()
|
|
self.logger.info("Job done in %.1fs", time.time() - t0)
|
|
#print("Job done in %.1fs" % (time.time() - t0))
|
|
self.writer.write_pagexml(pcgts)
|
|
|
|
if dir_in:
|
|
self.logger.info("All jobs done in %.1fs", time.time() - t0_tot)
|
|
print("all Job done in %.1fs", time.time() - t0_tot)
|
|
|
|
def run_single(self):
|
|
t0 = time.time()
|
|
img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
|
|
self.logger.info("Enhancing took %.1fs ", time.time() - t0)
|
|
if self.extract_only_images:
|
|
text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \
|
|
self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
|
|
ocr_all_textlines = None
|
|
pcgts = self.writer.build_pagexml_no_full_layout(
|
|
[], page_coord, [], [], [], [],
|
|
polygons_of_images, [], [], [], [], [],
|
|
cont_page, [], [], ocr_all_textlines, [])
|
|
if self.plotter:
|
|
self.plotter.write_images_into_directory(polygons_of_images, image_page)
|
|
return pcgts
|
|
|
|
if self.skip_layout_and_reading_order:
|
|
_ ,_, _, textline_mask_tot_ea, img_bin_light, _ = \
|
|
self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier,
|
|
skip_layout_and_reading_order=self.skip_layout_and_reading_order)
|
|
|
|
page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = \
|
|
self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light)
|
|
|
|
|
|
##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea)
|
|
|
|
cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea)
|
|
all_found_textline_polygons = filter_contours_area_of_image(
|
|
textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001)
|
|
|
|
all_found_textline_polygons=[ all_found_textline_polygons ]
|
|
|
|
all_found_textline_polygons = self.dilate_textregions_contours_textline_version(
|
|
all_found_textline_polygons)
|
|
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
|
|
all_found_textline_polygons, None, textline_mask_tot_ea, type_contour="textline")
|
|
|
|
|
|
order_text_new = [0]
|
|
slopes =[0]
|
|
id_of_texts_tot =['region_0001']
|
|
|
|
polygons_of_images = []
|
|
slopes_marginals = []
|
|
polygons_of_marginals = []
|
|
all_found_textline_polygons_marginals = []
|
|
all_box_coord_marginals = []
|
|
polygons_lines_xml = []
|
|
contours_tables = []
|
|
ocr_all_textlines = None
|
|
conf_contours_textregions =None
|
|
pcgts = self.writer.build_pagexml_no_full_layout(
|
|
cont_page, page_coord, order_text_new, id_of_texts_tot,
|
|
all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals,
|
|
all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
|
|
cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions)
|
|
return pcgts
|
|
|
|
#print("text region early -1 in %.1fs", time.time() - t0)
|
|
t1 = time.time()
|
|
if self.light_version:
|
|
text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light, confidence_matrix = \
|
|
self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
|
|
#print("text region early -2 in %.1fs", time.time() - t0)
|
|
|
|
if num_col_classifier == 1 or num_col_classifier ==2:
|
|
if num_col_classifier == 1:
|
|
img_w_new = 1000
|
|
else:
|
|
img_w_new = 1300
|
|
img_h_new = img_w_new * textline_mask_tot_ea.shape[0] // textline_mask_tot_ea.shape[1]
|
|
|
|
textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new )
|
|
|
|
slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew)
|
|
else:
|
|
slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea)
|
|
#print("text region early -2,5 in %.1fs", time.time() - t0)
|
|
#self.logger.info("Textregion detection took %.1fs ", time.time() - t1t)
|
|
num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \
|
|
text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \
|
|
self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea,
|
|
num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light)
|
|
#self.logger.info("run graphics %.1fs ", time.time() - t1t)
|
|
#print("text region early -3 in %.1fs", time.time() - t0)
|
|
textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea)
|
|
#print("text region early -4 in %.1fs", time.time() - t0)
|
|
else:
|
|
text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \
|
|
self.get_regions_from_xy_2models(img_res, is_image_enhanced,
|
|
num_col_classifier)
|
|
self.logger.info("Textregion detection took %.1fs ", time.time() - t1)
|
|
confidence_matrix = np.zeros((text_regions_p_1.shape[:2]))
|
|
|
|
t1 = time.time()
|
|
num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \
|
|
text_regions_p_1, cont_page, table_prediction = \
|
|
self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts)
|
|
self.logger.info("Graphics detection took %.1fs ", time.time() - t1)
|
|
#self.logger.info('cont_page %s', cont_page)
|
|
#plt.imshow(table_prediction)
|
|
#plt.show()
|
|
|
|
if not num_col:
|
|
self.logger.info("No columns detected, outputting an empty PAGE-XML")
|
|
ocr_all_textlines = None
|
|
pcgts = self.writer.build_pagexml_no_full_layout(
|
|
[], page_coord, [], [], [], [], [], [], [], [], [], [],
|
|
cont_page, [], [], ocr_all_textlines, [])
|
|
return pcgts
|
|
|
|
#print("text region early in %.1fs", time.time() - t0)
|
|
t1 = time.time()
|
|
if not self.light_version:
|
|
textline_mask_tot_ea = self.run_textline(image_page)
|
|
self.logger.info("textline detection took %.1fs", time.time() - t1)
|
|
t1 = time.time()
|
|
slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea)
|
|
self.logger.info("deskewing took %.1fs", time.time() - t1)
|
|
elif num_col_classifier in (1,2):
|
|
org_h_l_m = textline_mask_tot_ea.shape[0]
|
|
org_w_l_m = textline_mask_tot_ea.shape[1]
|
|
if num_col_classifier == 1:
|
|
img_w_new = 2000
|
|
else:
|
|
img_w_new = 2400
|
|
img_h_new = img_w_new * textline_mask_tot_ea.shape[0] // textline_mask_tot_ea.shape[1]
|
|
|
|
image_page = resize_image(image_page,img_h_new, img_w_new )
|
|
textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new )
|
|
mask_images = resize_image(mask_images,img_h_new, img_w_new )
|
|
mask_lines = resize_image(mask_lines,img_h_new, img_w_new )
|
|
text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new )
|
|
table_prediction = resize_image(table_prediction,img_h_new, img_w_new )
|
|
|
|
textline_mask_tot, text_regions_p, image_page_rotated = \
|
|
self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines,
|
|
num_col_classifier, slope_deskew, text_regions_p_1, table_prediction)
|
|
|
|
if self.light_version and num_col_classifier in (1,2):
|
|
image_page = resize_image(image_page,org_h_l_m, org_w_l_m )
|
|
textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m )
|
|
text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m )
|
|
textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m )
|
|
text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m )
|
|
table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m )
|
|
image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m )
|
|
|
|
self.logger.info("detection of marginals took %.1fs", time.time() - t1)
|
|
#print("text region early 2 marginal in %.1fs", time.time() - t0)
|
|
## birdan sora chock chakir
|
|
t1 = time.time()
|
|
if not self.full_layout:
|
|
polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \
|
|
boxes, boxes_d, polygons_of_marginals, contours_tables = \
|
|
self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
|
|
num_col_classifier, table_prediction, erosion_hurts)
|
|
###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals)
|
|
else:
|
|
polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \
|
|
regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \
|
|
self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
|
|
num_col_classifier, img_only_regions, table_prediction, erosion_hurts,
|
|
img_bin_light if self.light_version else None)
|
|
###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals)
|
|
if self.light_version:
|
|
drop_label_in_full_layout = 4
|
|
textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0
|
|
|
|
|
|
text_only = ((img_revised_tab[:, :] == 1)) * 1
|
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
|
text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1
|
|
|
|
#print("text region early 2 in %.1fs", time.time() - t0)
|
|
###min_con_area = 0.000005
|
|
contours_only_text, hir_on_text = return_contours_of_image(text_only)
|
|
contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text)
|
|
if len(contours_only_text_parent) > 0:
|
|
areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent])
|
|
areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1])
|
|
#self.logger.info('areas_cnt_text %s', areas_cnt_text)
|
|
contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)]
|
|
contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent)
|
|
if areas_cnt_text[jz] > MIN_AREA_REGION]
|
|
areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION]
|
|
index_con_parents = np.argsort(areas_cnt_text_parent)
|
|
|
|
contours_only_text_parent = self.return_list_of_contours_with_desired_order(
|
|
contours_only_text_parent, index_con_parents)
|
|
|
|
##try:
|
|
##contours_only_text_parent = \
|
|
##list(np.array(contours_only_text_parent,dtype=object)[index_con_parents])
|
|
##except:
|
|
##contours_only_text_parent = \
|
|
##list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents])
|
|
##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents])
|
|
areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(
|
|
areas_cnt_text_parent, index_con_parents)
|
|
|
|
cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest])
|
|
cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent)
|
|
|
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
|
contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d)
|
|
contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d)
|
|
|
|
areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d])
|
|
areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1])
|
|
|
|
if len(areas_cnt_text_d)>0:
|
|
contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)]
|
|
index_con_parents_d = np.argsort(areas_cnt_text_d)
|
|
contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(
|
|
contours_only_text_parent_d, index_con_parents_d)
|
|
#try:
|
|
#contours_only_text_parent_d = \
|
|
#list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d])
|
|
#except:
|
|
#contours_only_text_parent_d = \
|
|
#list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d])
|
|
#areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d])
|
|
areas_cnt_text_d = self.return_list_of_contours_with_desired_order(
|
|
areas_cnt_text_d, index_con_parents_d)
|
|
|
|
cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d])
|
|
cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d)
|
|
try:
|
|
if len(cx_bigest_d) >= 5:
|
|
cx_bigest_d_last5 = cx_bigest_d[-5:]
|
|
cy_biggest_d_last5 = cy_biggest_d[-5:]
|
|
dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 +
|
|
(cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2)
|
|
for j in range(len(cy_biggest_d_last5))]
|
|
ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d)
|
|
else:
|
|
cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):]
|
|
cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):]
|
|
dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 +
|
|
(cy_biggest_big[0]-cy_biggest_d_last5[j])**2)
|
|
for j in range(len(cy_biggest_d_last5))]
|
|
ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d)
|
|
|
|
cx_bigest_d_big[0] = cx_bigest_d[ind_largest]
|
|
cy_biggest_d_big[0] = cy_biggest_d[ind_largest]
|
|
except Exception as why:
|
|
self.logger.error(why)
|
|
|
|
(h, w) = text_only.shape[:2]
|
|
center = (w // 2.0, h // 2.0)
|
|
M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0)
|
|
M_22 = np.array(M)[:2, :2]
|
|
p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big])
|
|
x_diff = p_big[0] - cx_bigest_d_big
|
|
y_diff = p_big[1] - cy_biggest_d_big
|
|
|
|
contours_only_text_parent_d_ordered = []
|
|
for i in range(len(contours_only_text_parent)):
|
|
p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]])
|
|
p[0] = p[0] - x_diff[0]
|
|
p[1] = p[1] - y_diff[0]
|
|
dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 +
|
|
(p[1] - cy_biggest_d[j]) ** 2)
|
|
for j in range(len(cx_bigest_d))]
|
|
contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)])
|
|
# img2=np.zeros((text_only.shape[0],text_only.shape[1],3))
|
|
# img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1))
|
|
# plt.imshow(img2[:,:,0])
|
|
# plt.show()
|
|
else:
|
|
contours_only_text_parent_d_ordered = []
|
|
contours_only_text_parent_d = []
|
|
contours_only_text_parent = []
|
|
|
|
else:
|
|
contours_only_text_parent_d_ordered = []
|
|
contours_only_text_parent_d = []
|
|
#contours_only_text_parent = []
|
|
if not len(contours_only_text_parent):
|
|
# stop early
|
|
empty_marginals = [[]] * len(polygons_of_marginals)
|
|
if self.full_layout:
|
|
pcgts = self.writer.build_pagexml_full_layout(
|
|
[], [], page_coord, [], [], [], [], [], [],
|
|
polygons_of_images, contours_tables, [],
|
|
polygons_of_marginals, empty_marginals, empty_marginals, [], [], [],
|
|
cont_page, polygons_lines_xml, [], [], [])
|
|
else:
|
|
pcgts = self.writer.build_pagexml_no_full_layout(
|
|
[], page_coord, [], [], [], [],
|
|
polygons_of_images,
|
|
polygons_of_marginals, empty_marginals, empty_marginals, [], [],
|
|
cont_page, polygons_lines_xml, contours_tables, [], [])
|
|
return pcgts
|
|
|
|
|
|
|
|
#print("text region early 3 in %.1fs", time.time() - t0)
|
|
if self.light_version:
|
|
contours_only_text_parent = self.dilate_textregions_contours(
|
|
contours_only_text_parent)
|
|
contours_only_text_parent , contours_only_text_parent_d_ordered = self.filter_contours_inside_a_bigger_one(
|
|
contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, marginal_cnts=polygons_of_marginals)
|
|
#print("text region early 3.5 in %.1fs", time.time() - t0)
|
|
txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light(
|
|
contours_only_text_parent, self.image, slope_first, confidence_matrix, map=self.executor.map)
|
|
#txt_con_org = self.dilate_textregions_contours(txt_con_org)
|
|
#contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent)
|
|
else:
|
|
txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light(
|
|
contours_only_text_parent, self.image, slope_first, confidence_matrix, map=self.executor.map)
|
|
#print("text region early 4 in %.1fs", time.time() - t0)
|
|
boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent)
|
|
boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals)
|
|
#print("text region early 5 in %.1fs", time.time() - t0)
|
|
## birdan sora chock chakir
|
|
if not self.curved_line:
|
|
if self.light_version:
|
|
if self.textline_light:
|
|
all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \
|
|
all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light2(
|
|
txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org,
|
|
image_page_rotated, boxes_text, slope_deskew)
|
|
all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \
|
|
all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light2(
|
|
polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org,
|
|
image_page_rotated, boxes_marginals, slope_deskew)
|
|
|
|
#slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \
|
|
# self.delete_regions_without_textlines(slopes, all_found_textline_polygons,
|
|
# boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con)
|
|
#slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \
|
|
# self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals,
|
|
# boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals))))
|
|
#all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons)
|
|
#####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons)
|
|
all_found_textline_polygons = self.dilate_textregions_contours_textline_version(
|
|
all_found_textline_polygons)
|
|
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
|
|
all_found_textline_polygons, None, textline_mask_tot_ea_org, type_contour="textline")
|
|
all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(
|
|
all_found_textline_polygons_marginals)
|
|
contours_only_text_parent, txt_con_org, conf_contours_textregions, all_found_textline_polygons, contours_only_text_parent_d_ordered, \
|
|
index_by_text_par_con = self.filter_contours_without_textline_inside(
|
|
contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, conf_contours_textregions)
|
|
else:
|
|
textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1)
|
|
all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \
|
|
index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light(
|
|
txt_con_org, contours_only_text_parent, textline_mask_tot_ea,
|
|
image_page_rotated, boxes_text, slope_deskew)
|
|
all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \
|
|
all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light(
|
|
polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea,
|
|
image_page_rotated, boxes_marginals, slope_deskew)
|
|
#all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
|
|
# all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline")
|
|
else:
|
|
textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1)
|
|
all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \
|
|
all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new(
|
|
txt_con_org, contours_only_text_parent, textline_mask_tot_ea,
|
|
image_page_rotated, boxes_text, slope_deskew)
|
|
all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \
|
|
all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new(
|
|
polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea,
|
|
image_page_rotated, boxes_marginals, slope_deskew)
|
|
else:
|
|
scale_param = 1
|
|
textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2)
|
|
all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \
|
|
all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(
|
|
txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode,
|
|
image_page_rotated, boxes_text, text_only,
|
|
num_col_classifier, scale_param, slope_deskew)
|
|
all_found_textline_polygons = small_textlines_to_parent_adherence2(
|
|
all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier)
|
|
all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \
|
|
all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(
|
|
polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode,
|
|
image_page_rotated, boxes_marginals, text_only,
|
|
num_col_classifier, scale_param, slope_deskew)
|
|
all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(
|
|
all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier)
|
|
|
|
#print("text region early 6 in %.1fs", time.time() - t0)
|
|
if self.full_layout:
|
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
|
contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(
|
|
contours_only_text_parent_d_ordered, index_by_text_par_con)
|
|
#try:
|
|
#contours_only_text_parent_d_ordered = \
|
|
#list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con])
|
|
#except:
|
|
#contours_only_text_parent_d_ordered = \
|
|
#list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con])
|
|
else:
|
|
#takes long timee
|
|
contours_only_text_parent_d_ordered = None
|
|
if self.light_version:
|
|
fun = check_any_text_region_in_model_one_is_main_or_header_light
|
|
else:
|
|
fun = check_any_text_region_in_model_one_is_main_or_header
|
|
text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \
|
|
all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \
|
|
contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, \
|
|
conf_contours_textregions, conf_contours_textregions_h = fun(
|
|
text_regions_p, regions_fully, contours_only_text_parent,
|
|
all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered, conf_contours_textregions)
|
|
|
|
if self.plotter:
|
|
self.plotter.save_plot_of_layout(text_regions_p, image_page)
|
|
self.plotter.save_plot_of_layout_all(text_regions_p, image_page)
|
|
|
|
pixel_img = 4
|
|
polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img)
|
|
all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(
|
|
text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h,
|
|
all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h,
|
|
kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light)
|
|
|
|
if not self.reading_order_machine_based:
|
|
pixel_seps = 6
|
|
if not self.headers_off:
|
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(
|
|
np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2),
|
|
num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h)
|
|
else:
|
|
_, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
|
|
np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),
|
|
num_col_classifier, self.tables, pixel_seps, contours_only_text_parent_h_d_ordered)
|
|
elif self.headers_off:
|
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(
|
|
np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2),
|
|
num_col_classifier, self.tables, pixel_seps)
|
|
else:
|
|
_, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
|
|
np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),
|
|
num_col_classifier, self.tables, pixel_seps)
|
|
|
|
if num_col_classifier >= 3:
|
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
regions_without_separators = regions_without_separators.astype(np.uint8)
|
|
regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6)
|
|
else:
|
|
regions_without_separators_d = regions_without_separators_d.astype(np.uint8)
|
|
regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6)
|
|
|
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(
|
|
splitter_y_new, regions_without_separators, matrix_of_lines_ch,
|
|
num_col_classifier, erosion_hurts, self.tables, self.right2left)
|
|
else:
|
|
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(
|
|
splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d,
|
|
num_col_classifier, erosion_hurts, self.tables, self.right2left)
|
|
|
|
if self.plotter:
|
|
self.plotter.write_images_into_directory(polygons_of_images, image_page)
|
|
t_order = time.time()
|
|
|
|
if self.full_layout:
|
|
if self.reading_order_machine_based:
|
|
order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(
|
|
contours_only_text_parent, contours_only_text_parent_h, text_regions_p)
|
|
else:
|
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
order_text_new, id_of_texts_tot = self.do_order_of_regions(
|
|
contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot)
|
|
else:
|
|
order_text_new, id_of_texts_tot = self.do_order_of_regions(
|
|
contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d)
|
|
self.logger.info("detection of reading order took %.1fs", time.time() - t_order)
|
|
|
|
if self.ocr:
|
|
ocr_all_textlines = []
|
|
else:
|
|
ocr_all_textlines = None
|
|
pcgts = self.writer.build_pagexml_full_layout(
|
|
contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot,
|
|
all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h,
|
|
polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals,
|
|
all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals,
|
|
cont_page, polygons_lines_xml, ocr_all_textlines, conf_contours_textregions, conf_contours_textregions_h)
|
|
return pcgts
|
|
|
|
contours_only_text_parent_h = None
|
|
if self.reading_order_machine_based:
|
|
order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(
|
|
contours_only_text_parent, contours_only_text_parent_h, text_regions_p)
|
|
else:
|
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
order_text_new, id_of_texts_tot = self.do_order_of_regions(
|
|
contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot)
|
|
else:
|
|
contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(
|
|
contours_only_text_parent_d_ordered, index_by_text_par_con)
|
|
#try:
|
|
#contours_only_text_parent_d_ordered = \
|
|
#list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con])
|
|
#except:
|
|
#contours_only_text_parent_d_ordered = \
|
|
#list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con])
|
|
order_text_new, id_of_texts_tot = self.do_order_of_regions(
|
|
contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d)
|
|
|
|
if self.ocr:
|
|
device = cuda.get_current_device()
|
|
device.reset()
|
|
gc.collect()
|
|
model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
|
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
|
|
torch.cuda.empty_cache()
|
|
model_ocr.to(device)
|
|
|
|
ind_tot = 0
|
|
#cv2.imwrite('./img_out.png', image_page)
|
|
ocr_all_textlines = []
|
|
for indexing, ind_poly_first in enumerate(all_found_textline_polygons):
|
|
ocr_textline_in_textregion = []
|
|
for indexing2, ind_poly in enumerate(ind_poly_first):
|
|
if not (self.textline_light or self.curved_line):
|
|
ind_poly = copy.deepcopy(ind_poly)
|
|
box_ind = all_box_coord[indexing]
|
|
#print(ind_poly,np.shape(ind_poly), 'ind_poly')
|
|
#print(box_ind)
|
|
ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind)
|
|
#print(ind_poly_copy)
|
|
ind_poly[ind_poly<0] = 0
|
|
x, y, w, h = cv2.boundingRect(ind_poly)
|
|
#print(ind_poly_copy, np.shape(ind_poly_copy))
|
|
#print(x, y, w, h, h/float(w),'ratio')
|
|
h2w_ratio = h/float(w)
|
|
mask_poly = np.zeros(image_page.shape)
|
|
if not self.light_version:
|
|
img_poly_on_img = np.copy(image_page)
|
|
else:
|
|
img_poly_on_img = np.copy(img_bin_light)
|
|
mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1))
|
|
|
|
if self.textline_light:
|
|
mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1)
|
|
img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255
|
|
img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255
|
|
img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255
|
|
|
|
img_croped = img_poly_on_img[y:y+h, x:x+w, :]
|
|
#cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped)
|
|
text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot)
|
|
ocr_textline_in_textregion.append(text_ocr)
|
|
ind_tot = ind_tot +1
|
|
ocr_all_textlines.append(ocr_textline_in_textregion)
|
|
|
|
else:
|
|
ocr_all_textlines = None
|
|
#print(ocr_all_textlines)
|
|
self.logger.info("detection of reading order took %.1fs", time.time() - t_order)
|
|
pcgts = self.writer.build_pagexml_no_full_layout(
|
|
txt_con_org, page_coord, order_text_new, id_of_texts_tot,
|
|
all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals,
|
|
all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
|
|
cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions)
|
|
return pcgts
|
|
|
|
|
|
class Eynollah_ocr:
|
|
def __init__(
|
|
self,
|
|
dir_models,
|
|
dir_xmls=None,
|
|
dir_in=None,
|
|
dir_in_bin=None,
|
|
dir_out=None,
|
|
dir_out_image_text=None,
|
|
tr_ocr=False,
|
|
batch_size=None,
|
|
export_textline_images_and_text=False,
|
|
do_not_mask_with_textline_contour=False,
|
|
draw_texts_on_image=False,
|
|
prediction_with_both_of_rgb_and_bin=False,
|
|
logger=None,
|
|
):
|
|
self.dir_in = dir_in
|
|
self.dir_in_bin = dir_in_bin
|
|
self.dir_out = dir_out
|
|
self.dir_xmls = dir_xmls
|
|
self.dir_models = dir_models
|
|
self.tr_ocr = tr_ocr
|
|
self.export_textline_images_and_text = export_textline_images_and_text
|
|
self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour
|
|
self.draw_texts_on_image = draw_texts_on_image
|
|
self.dir_out_image_text = dir_out_image_text
|
|
self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin
|
|
if tr_ocr:
|
|
self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
|
|
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
|
|
self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
|
|
self.model_ocr.to(self.device)
|
|
if not batch_size:
|
|
self.b_s = 2
|
|
else:
|
|
self.b_s = int(batch_size)
|
|
|
|
else:
|
|
self.model_ocr_dir = dir_models + "/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
|
|
model_ocr = load_model(self.model_ocr_dir , compile=False)
|
|
|
|
self.prediction_model = tf.keras.models.Model(
|
|
model_ocr.get_layer(name = "image").input,
|
|
model_ocr.get_layer(name = "dense2").output)
|
|
if not batch_size:
|
|
self.b_s = 8
|
|
else:
|
|
self.b_s = int(batch_size)
|
|
|
|
|
|
with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
|
|
characters = json.load(config_file)
|
|
|
|
|
|
AUTOTUNE = tf.data.AUTOTUNE
|
|
|
|
# Mapping characters to integers.
|
|
char_to_num = StringLookup(vocabulary=list(characters), mask_token=None)
|
|
|
|
# Mapping integers back to original characters.
|
|
self.num_to_char = StringLookup(
|
|
vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
|
|
)
|
|
|
|
|
|
def decode_batch_predictions(self, pred, max_len = 128):
|
|
# input_len is the product of the batch size and the
|
|
# number of time steps.
|
|
input_len = np.ones(pred.shape[0]) * pred.shape[1]
|
|
|
|
# Decode CTC predictions using greedy search.
|
|
# decoded is a tuple with 2 elements.
|
|
decoded = tf.keras.backend.ctc_decode(pred,
|
|
input_length = input_len,
|
|
beam_width = 100)
|
|
# The outputs are in the first element of the tuple.
|
|
# Additionally, the first element is actually a list,
|
|
# therefore we take the first element of that list as well.
|
|
#print(decoded,'decoded')
|
|
decoded = decoded[0][0][:, :max_len]
|
|
|
|
#print(decoded, decoded.shape,'decoded')
|
|
|
|
output = []
|
|
for d in decoded:
|
|
# Convert the predicted indices to the corresponding chars.
|
|
d = tf.strings.reduce_join(self.num_to_char(d))
|
|
d = d.numpy().decode("utf-8")
|
|
output.append(d)
|
|
return output
|
|
|
|
|
|
def distortion_free_resize(self, image, img_size):
|
|
w, h = img_size
|
|
image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True)
|
|
|
|
# Check tha amount of padding needed to be done.
|
|
pad_height = h - tf.shape(image)[0]
|
|
pad_width = w - tf.shape(image)[1]
|
|
|
|
# Only necessary if you want to do same amount of padding on both sides.
|
|
if pad_height % 2 != 0:
|
|
height = pad_height // 2
|
|
pad_height_top = height + 1
|
|
pad_height_bottom = height
|
|
else:
|
|
pad_height_top = pad_height_bottom = pad_height // 2
|
|
|
|
if pad_width % 2 != 0:
|
|
width = pad_width // 2
|
|
pad_width_left = width + 1
|
|
pad_width_right = width
|
|
else:
|
|
pad_width_left = pad_width_right = pad_width // 2
|
|
|
|
image = tf.pad(
|
|
image,
|
|
paddings=[
|
|
[pad_height_top, pad_height_bottom],
|
|
[pad_width_left, pad_width_right],
|
|
[0, 0],
|
|
],
|
|
)
|
|
|
|
image = tf.transpose(image, (1, 0, 2))
|
|
image = tf.image.flip_left_right(image)
|
|
return image
|
|
|
|
def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image):
|
|
width = np.shape(textline_image)[1]
|
|
height = np.shape(textline_image)[0]
|
|
common_window = int(0.22*width)
|
|
|
|
width1 = int ( width/2. - common_window )
|
|
width2 = int ( width/2. + common_window )
|
|
|
|
img_sum = np.sum(textline_image[:,:,0], axis=0)
|
|
sum_smoothed = gaussian_filter1d(img_sum, 3)
|
|
|
|
peaks_real, _ = find_peaks(sum_smoothed, height=0)
|
|
|
|
if len(peaks_real)>35:
|
|
|
|
#peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
|
|
argsort = np.argsort(sum_smoothed[peaks_real])[::-1]
|
|
peaks_real_top_six = peaks_real[argsort[:6]]
|
|
midpoint = textline_image.shape[1] / 2.
|
|
arg_closest = np.argmin(np.abs(peaks_real_top_six - midpoint))
|
|
|
|
#arg_max = np.argmax(sum_smoothed[peaks_real])
|
|
|
|
peaks_final = peaks_real_top_six[arg_closest]#peaks_real[arg_max]
|
|
|
|
return peaks_final
|
|
else:
|
|
return None
|
|
|
|
# Function to fit text inside the given area
|
|
def fit_text_single_line(self, draw, text, font_path, max_width, max_height):
|
|
initial_font_size = 50
|
|
font_size = initial_font_size
|
|
while font_size > 10: # Minimum font size
|
|
font = ImageFont.truetype(font_path, font_size)
|
|
text_bbox = draw.textbbox((0, 0), text, font=font) # Get text bounding box
|
|
text_width = text_bbox[2] - text_bbox[0]
|
|
text_height = text_bbox[3] - text_bbox[1]
|
|
|
|
if text_width <= max_width and text_height <= max_height:
|
|
return font # Return the best-fitting font
|
|
|
|
font_size -= 2 # Reduce font size and retry
|
|
|
|
return ImageFont.truetype(font_path, 10) # Smallest font fallback
|
|
|
|
def return_textlines_split_if_needed(self, textline_image, textline_image_bin):
|
|
|
|
split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image)
|
|
if split_point:
|
|
image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height))
|
|
image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height))
|
|
if self.prediction_with_both_of_rgb_and_bin:
|
|
image1_bin = textline_image_bin[:, :split_point,:]# image.crop((0, 0, width2, height))
|
|
image2_bin = textline_image_bin[:, split_point:,:]#image.crop((width1, 0, width, height))
|
|
return [image1, image2], [image1_bin, image2_bin]
|
|
else:
|
|
return [image1, image2], None
|
|
else:
|
|
return None, None
|
|
def preprocess_and_resize_image_for_ocrcnn_model(self, img, image_height, image_width):
|
|
ratio = image_height /float(img.shape[0])
|
|
w_ratio = int(ratio * img.shape[1])
|
|
|
|
if w_ratio <= image_width:
|
|
width_new = w_ratio
|
|
else:
|
|
width_new = image_width
|
|
|
|
if width_new == 0:
|
|
width_new = img.shape[1]
|
|
|
|
##if width_new+32 >= image_width:
|
|
##width_new = width_new - 32
|
|
|
|
###patch_zero = np.zeros((32, 32, 3))#+255
|
|
###patch_zero[9:19,8:18,:] = 0
|
|
|
|
|
|
img = resize_image(img, image_height, width_new)
|
|
img_fin = np.ones((image_height, image_width, 3))*255
|
|
###img_fin[:,:32,:] = patch_zero[:,:,:]
|
|
###img_fin[:,32:32+width_new,:] = img[:,:,:]
|
|
img_fin[:,:width_new,:] = img[:,:,:]
|
|
img_fin = img_fin / 255.
|
|
return img_fin
|
|
|
|
def run(self):
|
|
ls_imgs = os.listdir(self.dir_in)
|
|
|
|
if self.tr_ocr:
|
|
tr_ocr_input_height_and_width = 384
|
|
for ind_img in ls_imgs:
|
|
file_name = Path(ind_img).stem
|
|
dir_img = os.path.join(self.dir_in, ind_img)
|
|
dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
|
|
out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
|
|
img = cv2.imread(dir_img)
|
|
|
|
if self.draw_texts_on_image:
|
|
out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png')
|
|
image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
|
|
draw = ImageDraw.Draw(image_text)
|
|
total_bb_coordinates = []
|
|
|
|
##file_name = Path(dir_xmls).stem
|
|
tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding="utf-8"))
|
|
root1=tree1.getroot()
|
|
alltags=[elem.tag for elem in root1.iter()]
|
|
link=alltags[0].split('}')[0]+'}'
|
|
|
|
name_space = alltags[0].split('}')[0]
|
|
name_space = name_space.split('{')[1]
|
|
|
|
region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')])
|
|
|
|
|
|
|
|
cropped_lines = []
|
|
cropped_lines_region_indexer = []
|
|
cropped_lines_meging_indexing = []
|
|
|
|
indexer_text_region = 0
|
|
for nn in root1.iter(region_tags):
|
|
for child_textregion in nn:
|
|
if child_textregion.tag.endswith("TextLine"):
|
|
|
|
for child_textlines in child_textregion:
|
|
if child_textlines.tag.endswith("Coords"):
|
|
cropped_lines_region_indexer.append(indexer_text_region)
|
|
p_h=child_textlines.attrib['points'].split(' ')
|
|
textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] )
|
|
x,y,w,h = cv2.boundingRect(textline_coords)
|
|
|
|
if self.draw_texts_on_image:
|
|
total_bb_coordinates.append([x,y,w,h])
|
|
|
|
h2w_ratio = h/float(w)
|
|
|
|
img_poly_on_img = np.copy(img)
|
|
mask_poly = np.zeros(img.shape)
|
|
mask_poly = cv2.fillPoly(mask_poly, pts=[textline_coords], color=(1, 1, 1))
|
|
|
|
mask_poly = mask_poly[y:y+h, x:x+w, :]
|
|
img_crop = img_poly_on_img[y:y+h, x:x+w, :]
|
|
img_crop[mask_poly==0] = 255
|
|
|
|
if h2w_ratio > 0.1:
|
|
cropped_lines.append(resize_image(img_crop, tr_ocr_input_height_and_width, tr_ocr_input_height_and_width) )
|
|
cropped_lines_meging_indexing.append(0)
|
|
else:
|
|
splited_images, _ = self.return_textlines_split_if_needed(img_crop, None)
|
|
#print(splited_images)
|
|
if splited_images:
|
|
cropped_lines.append(resize_image(splited_images[0], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width))
|
|
cropped_lines_meging_indexing.append(1)
|
|
cropped_lines.append(resize_image(splited_images[1], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width))
|
|
cropped_lines_meging_indexing.append(-1)
|
|
else:
|
|
cropped_lines.append(img_crop)
|
|
cropped_lines_meging_indexing.append(0)
|
|
indexer_text_region = indexer_text_region +1
|
|
|
|
|
|
extracted_texts = []
|
|
n_iterations = math.ceil(len(cropped_lines) / self.b_s)
|
|
|
|
for i in range(n_iterations):
|
|
if i==(n_iterations-1):
|
|
n_start = i*self.b_s
|
|
imgs = cropped_lines[n_start:]
|
|
else:
|
|
n_start = i*self.b_s
|
|
n_end = (i+1)*self.b_s
|
|
imgs = cropped_lines[n_start:n_end]
|
|
pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
|
|
generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
|
|
generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
|
|
|
|
extracted_texts = extracted_texts + generated_text_merged
|
|
|
|
del cropped_lines
|
|
gc.collect()
|
|
|
|
extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
|
|
|
|
extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
|
|
#print(extracted_texts_merged, len(extracted_texts_merged))
|
|
|
|
unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
|
|
|
|
if self.draw_texts_on_image:
|
|
|
|
font_path = "NotoSans-Regular.ttf" # Make sure this file exists!
|
|
font = ImageFont.truetype(font_path, 40)
|
|
|
|
for indexer_text, bb_ind in enumerate(total_bb_coordinates):
|
|
|
|
|
|
x_bb = bb_ind[0]
|
|
y_bb = bb_ind[1]
|
|
w_bb = bb_ind[2]
|
|
h_bb = bb_ind[3]
|
|
|
|
font = self.fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) )
|
|
|
|
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
|
|
|
|
text_bbox = draw.textbbox((0, 0), extracted_texts_merged[indexer_text], font=font)
|
|
text_width = text_bbox[2] - text_bbox[0]
|
|
text_height = text_bbox[3] - text_bbox[1]
|
|
|
|
text_x = x_bb + (w_bb - text_width) // 2 # Center horizontally
|
|
text_y = y_bb + (h_bb - text_height) // 2 # Center vertically
|
|
|
|
# Draw the text
|
|
draw.text((text_x, text_y), extracted_texts_merged[indexer_text], fill="black", font=font)
|
|
image_text.save(out_image_with_text)
|
|
|
|
#print(len(unique_cropped_lines_region_indexer), 'unique_cropped_lines_region_indexer')
|
|
text_by_textregion = []
|
|
for ind in unique_cropped_lines_region_indexer:
|
|
extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind]
|
|
|
|
text_by_textregion.append(" ".join(extracted_texts_merged_un))
|
|
|
|
#print(len(text_by_textregion) , indexer_text_region, "text_by_textregion")
|
|
|
|
|
|
#print(time.time() - t0 ,'elapsed time')
|
|
|
|
|
|
indexer = 0
|
|
indexer_textregion = 0
|
|
for nn in root1.iter(region_tags):
|
|
text_subelement_textregion = ET.SubElement(nn, 'TextEquiv')
|
|
unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode')
|
|
|
|
|
|
has_textline = False
|
|
for child_textregion in nn:
|
|
if child_textregion.tag.endswith("TextLine"):
|
|
text_subelement = ET.SubElement(child_textregion, 'TextEquiv')
|
|
unicode_textline = ET.SubElement(text_subelement, 'Unicode')
|
|
unicode_textline.text = extracted_texts_merged[indexer]
|
|
indexer = indexer + 1
|
|
has_textline = True
|
|
if has_textline:
|
|
unicode_textregion.text = text_by_textregion[indexer_textregion]
|
|
indexer_textregion = indexer_textregion + 1
|
|
|
|
|
|
|
|
ET.register_namespace("",name_space)
|
|
tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None)
|
|
#print("Job done in %.1fs", time.time() - t0)
|
|
else:
|
|
max_len = 512
|
|
padding_token = 299
|
|
image_width = 512#max_len * 4
|
|
image_height = 32
|
|
|
|
|
|
img_size=(image_width, image_height)
|
|
|
|
for ind_img in ls_imgs:
|
|
file_name = Path(ind_img).stem
|
|
dir_img = os.path.join(self.dir_in, ind_img)
|
|
dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
|
|
out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
|
|
img = cv2.imread(dir_img)
|
|
if self.prediction_with_both_of_rgb_and_bin:
|
|
cropped_lines_bin = []
|
|
dir_img_bin = os.path.join(self.dir_in_bin, file_name+'.png')
|
|
img_bin = cv2.imread(dir_img_bin)
|
|
|
|
if self.draw_texts_on_image:
|
|
out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png')
|
|
image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
|
|
draw = ImageDraw.Draw(image_text)
|
|
total_bb_coordinates = []
|
|
|
|
tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding="utf-8"))
|
|
root1=tree1.getroot()
|
|
alltags=[elem.tag for elem in root1.iter()]
|
|
link=alltags[0].split('}')[0]+'}'
|
|
|
|
name_space = alltags[0].split('}')[0]
|
|
name_space = name_space.split('{')[1]
|
|
|
|
region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')])
|
|
|
|
cropped_lines = []
|
|
cropped_lines_region_indexer = []
|
|
cropped_lines_meging_indexing = []
|
|
|
|
tinl = time.time()
|
|
indexer_text_region = 0
|
|
indexer_textlines = 0
|
|
for nn in root1.iter(region_tags):
|
|
for child_textregion in nn:
|
|
if child_textregion.tag.endswith("TextLine"):
|
|
for child_textlines in child_textregion:
|
|
if child_textlines.tag.endswith("Coords"):
|
|
cropped_lines_region_indexer.append(indexer_text_region)
|
|
p_h=child_textlines.attrib['points'].split(' ')
|
|
textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] )
|
|
|
|
x,y,w,h = cv2.boundingRect(textline_coords)
|
|
|
|
if self.draw_texts_on_image:
|
|
total_bb_coordinates.append([x,y,w,h])
|
|
|
|
w_scaled = w * image_height/float(h)
|
|
|
|
img_poly_on_img = np.copy(img)
|
|
if self.prediction_with_both_of_rgb_and_bin:
|
|
img_poly_on_img_bin = np.copy(img_bin)
|
|
img_crop_bin = img_poly_on_img_bin[y:y+h, x:x+w, :]
|
|
|
|
mask_poly = np.zeros(img.shape)
|
|
mask_poly = cv2.fillPoly(mask_poly, pts=[textline_coords], color=(1, 1, 1))
|
|
|
|
mask_poly = mask_poly[y:y+h, x:x+w, :]
|
|
img_crop = img_poly_on_img[y:y+h, x:x+w, :]
|
|
if not self.do_not_mask_with_textline_contour:
|
|
img_crop[mask_poly==0] = 255
|
|
if self.prediction_with_both_of_rgb_and_bin:
|
|
img_crop_bin[mask_poly==0] = 255
|
|
|
|
if not self.export_textline_images_and_text:
|
|
if w_scaled < 1.5*image_width:
|
|
img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
|
|
cropped_lines.append(img_fin)
|
|
cropped_lines_meging_indexing.append(0)
|
|
if self.prediction_with_both_of_rgb_and_bin:
|
|
img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
|
|
cropped_lines_bin.append(img_fin)
|
|
else:
|
|
if self.prediction_with_both_of_rgb_and_bin:
|
|
splited_images, splited_images_bin = self.return_textlines_split_if_needed(img_crop, img_crop_bin)
|
|
else:
|
|
splited_images, splited_images_bin = self.return_textlines_split_if_needed(img_crop, None)
|
|
if splited_images:
|
|
img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
|
|
cropped_lines.append(img_fin)
|
|
cropped_lines_meging_indexing.append(1)
|
|
img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
|
|
|
|
cropped_lines.append(img_fin)
|
|
cropped_lines_meging_indexing.append(-1)
|
|
|
|
if self.prediction_with_both_of_rgb_and_bin:
|
|
img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[0], image_height, image_width)
|
|
cropped_lines_bin.append(img_fin)
|
|
img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[1], image_height, image_width)
|
|
cropped_lines_bin.append(img_fin)
|
|
|
|
else:
|
|
img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
|
|
cropped_lines.append(img_fin)
|
|
cropped_lines_meging_indexing.append(0)
|
|
|
|
if self.prediction_with_both_of_rgb_and_bin:
|
|
img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
|
|
cropped_lines_bin.append(img_fin)
|
|
|
|
if self.export_textline_images_and_text:
|
|
if child_textlines.tag.endswith("TextEquiv"):
|
|
for cheild_text in child_textlines:
|
|
if cheild_text.tag.endswith("Unicode"):
|
|
textline_text = cheild_text.text
|
|
if textline_text:
|
|
with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.txt'), 'w') as text_file:
|
|
text_file.write(textline_text)
|
|
|
|
cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.png'), img_crop )
|
|
|
|
indexer_textlines+=1
|
|
|
|
if not self.export_textline_images_and_text:
|
|
indexer_text_region = indexer_text_region +1
|
|
|
|
if not self.export_textline_images_and_text:
|
|
extracted_texts = []
|
|
|
|
n_iterations = math.ceil(len(cropped_lines) / self.b_s)
|
|
|
|
for i in range(n_iterations):
|
|
if i==(n_iterations-1):
|
|
n_start = i*self.b_s
|
|
imgs = cropped_lines[n_start:]
|
|
imgs = np.array(imgs)
|
|
imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3)
|
|
if self.prediction_with_both_of_rgb_and_bin:
|
|
imgs_bin = cropped_lines_bin[n_start:]
|
|
imgs_bin = np.array(imgs_bin)
|
|
imgs_bin = imgs_bin.reshape(imgs_bin.shape[0], image_height, image_width, 3)
|
|
else:
|
|
n_start = i*self.b_s
|
|
n_end = (i+1)*self.b_s
|
|
imgs = cropped_lines[n_start:n_end]
|
|
imgs = np.array(imgs).reshape(self.b_s, image_height, image_width, 3)
|
|
|
|
if self.prediction_with_both_of_rgb_and_bin:
|
|
imgs_bin = cropped_lines_bin[n_start:n_end]
|
|
imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3)
|
|
|
|
|
|
preds = self.prediction_model.predict(imgs, verbose=0)
|
|
if self.prediction_with_both_of_rgb_and_bin:
|
|
preds_bin = self.prediction_model.predict(imgs_bin, verbose=0)
|
|
preds = (preds + preds_bin) / 2.
|
|
|
|
pred_texts = self.decode_batch_predictions(preds)
|
|
|
|
for ib in range(imgs.shape[0]):
|
|
pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
|
|
extracted_texts.append(pred_texts_ib)
|
|
|
|
del cropped_lines
|
|
if self.prediction_with_both_of_rgb_and_bin:
|
|
del cropped_lines_bin
|
|
gc.collect()
|
|
|
|
extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
|
|
|
|
extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
|
|
unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
|
|
|
|
|
|
if self.draw_texts_on_image:
|
|
|
|
font_path = "NotoSans-Regular.ttf" # Make sure this file exists!
|
|
font = ImageFont.truetype(font_path, 40)
|
|
|
|
for indexer_text, bb_ind in enumerate(total_bb_coordinates):
|
|
|
|
|
|
x_bb = bb_ind[0]
|
|
y_bb = bb_ind[1]
|
|
w_bb = bb_ind[2]
|
|
h_bb = bb_ind[3]
|
|
|
|
font = self.fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) )
|
|
|
|
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
|
|
|
|
text_bbox = draw.textbbox((0, 0), extracted_texts_merged[indexer_text], font=font)
|
|
text_width = text_bbox[2] - text_bbox[0]
|
|
text_height = text_bbox[3] - text_bbox[1]
|
|
|
|
text_x = x_bb + (w_bb - text_width) // 2 # Center horizontally
|
|
text_y = y_bb + (h_bb - text_height) // 2 # Center vertically
|
|
|
|
# Draw the text
|
|
draw.text((text_x, text_y), extracted_texts_merged[indexer_text], fill="black", font=font)
|
|
image_text.save(out_image_with_text)
|
|
|
|
text_by_textregion = []
|
|
for ind in unique_cropped_lines_region_indexer:
|
|
extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind]
|
|
text_by_textregion.append("".join(extracted_texts_merged_un))
|
|
|
|
indexer = 0
|
|
indexer_textregion = 0
|
|
for nn in root1.iter(region_tags):
|
|
|
|
is_textregion_text = False
|
|
for childtest in nn:
|
|
if childtest.tag.endswith("TextEquiv"):
|
|
is_textregion_text = True
|
|
|
|
if not is_textregion_text:
|
|
text_subelement_textregion = ET.SubElement(nn, 'TextEquiv')
|
|
unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode')
|
|
|
|
|
|
has_textline = False
|
|
for child_textregion in nn:
|
|
if child_textregion.tag.endswith("TextLine"):
|
|
|
|
is_textline_text = False
|
|
for childtest2 in child_textregion:
|
|
if childtest2.tag.endswith("TextEquiv"):
|
|
is_textline_text = True
|
|
|
|
|
|
if not is_textline_text:
|
|
text_subelement = ET.SubElement(child_textregion, 'TextEquiv')
|
|
unicode_textline = ET.SubElement(text_subelement, 'Unicode')
|
|
unicode_textline.text = extracted_texts_merged[indexer]
|
|
else:
|
|
for childtest3 in child_textregion:
|
|
if childtest3.tag.endswith("TextEquiv"):
|
|
for child_uc in childtest3:
|
|
if child_uc.tag.endswith("Unicode"):
|
|
child_uc.text = extracted_texts_merged[indexer]
|
|
|
|
indexer = indexer + 1
|
|
has_textline = True
|
|
if has_textline:
|
|
if is_textregion_text:
|
|
for child4 in nn:
|
|
if child4.tag.endswith("TextEquiv"):
|
|
for childtr_uc in child4:
|
|
if childtr_uc.tag.endswith("Unicode"):
|
|
childtr_uc.text = text_by_textregion[indexer_textregion]
|
|
else:
|
|
unicode_textregion.text = text_by_textregion[indexer_textregion]
|
|
indexer_textregion = indexer_textregion + 1
|
|
|
|
ET.register_namespace("",name_space)
|
|
tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None)
|
|
#print("Job done in %.1fs", time.time() - t0)
|