mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-09-01 13:29:58 +02:00
1142 lines
52 KiB
Python
1142 lines
52 KiB
Python
"""
|
|
Image enhancer. The output can be written as same scale of input or in new predicted scale.
|
|
"""
|
|
|
|
from logging import Logger
|
|
from difflib import SequenceMatcher as sq
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
import math
|
|
import os
|
|
import sys
|
|
import time
|
|
from typing import Optional
|
|
import atexit
|
|
import warnings
|
|
from functools import partial
|
|
from pathlib import Path
|
|
from multiprocessing import cpu_count
|
|
import gc
|
|
import copy
|
|
from loky import ProcessPoolExecutor
|
|
import xml.etree.ElementTree as ET
|
|
import cv2
|
|
import numpy as np
|
|
from ocrd import OcrdPage
|
|
from ocrd_utils import getLogger, tf_disable_interactive_logs
|
|
import statistics
|
|
from tensorflow.keras.models import load_model
|
|
from .utils.resize import resize_image
|
|
from .utils import (
|
|
crop_image_inside_box
|
|
)
|
|
|
|
from .utils.contour import (
|
|
filter_contours_area_of_image,
|
|
filter_contours_area_of_image_tables,
|
|
find_contours_mean_y_diff,
|
|
find_new_features_of_contours,
|
|
find_features_of_contours,
|
|
get_text_region_boxes_by_given_contours,
|
|
get_textregion_contours_in_org_image,
|
|
get_textregion_contours_in_org_image_light,
|
|
return_contours_of_image,
|
|
return_contours_of_interested_region,
|
|
return_contours_of_interested_region_by_min_size,
|
|
return_contours_of_interested_textline,
|
|
return_parent_contours,
|
|
)
|
|
|
|
DPI_THRESHOLD = 298
|
|
KERNEL = np.ones((5, 5), np.uint8)
|
|
|
|
|
|
class machine_based_reading_order_on_layout:
|
|
def __init__(
|
|
self,
|
|
dir_models : str,
|
|
dir_out : Optional[str] = None,
|
|
logger : Optional[Logger] = None,
|
|
):
|
|
self.dir_out = dir_out
|
|
|
|
self.logger = logger if logger else getLogger('mbro on layout')
|
|
# for parallelization of CPU-intensive tasks:
|
|
self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200)
|
|
atexit.register(self.executor.shutdown)
|
|
self.dir_models = dir_models
|
|
self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824"#"/model_ens_reading_order_machine_based"
|
|
|
|
try:
|
|
for device in tf.config.list_physical_devices('GPU'):
|
|
tf.config.experimental.set_memory_growth(device, True)
|
|
except:
|
|
self.logger.warning("no GPU device available")
|
|
|
|
self.model_reading_order = self.our_load_model(self.model_reading_order_dir)
|
|
self.light_version = True
|
|
|
|
|
|
def cache_images(self, image_filename=None, image_pil=None, dpi=None):
|
|
ret = {}
|
|
t_c0 = time.time()
|
|
if image_filename:
|
|
ret['img'] = cv2.imread(image_filename)
|
|
if self.light_version:
|
|
self.dpi = 100
|
|
else:
|
|
self.dpi = 0#check_dpi(image_filename)
|
|
else:
|
|
ret['img'] = pil2cv(image_pil)
|
|
if self.light_version:
|
|
self.dpi = 100
|
|
else:
|
|
self.dpi = 0#check_dpi(image_pil)
|
|
ret['img_grayscale'] = cv2.cvtColor(ret['img'], cv2.COLOR_BGR2GRAY)
|
|
for prefix in ('', '_grayscale'):
|
|
ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8)
|
|
self._imgs = ret
|
|
if dpi is not None:
|
|
self.dpi = dpi
|
|
|
|
def reset_file_name_dir(self, image_filename):
|
|
t_c = time.time()
|
|
self.cache_images(image_filename=image_filename)
|
|
self.output_filename = os.path.join(self.dir_out, Path(image_filename).stem +'.png')
|
|
|
|
def imread(self, grayscale=False, uint8=True):
|
|
key = 'img'
|
|
if grayscale:
|
|
key += '_grayscale'
|
|
if uint8:
|
|
key += '_uint8'
|
|
return self._imgs[key].copy()
|
|
|
|
def isNaN(self, num):
|
|
return num != num
|
|
|
|
@staticmethod
|
|
def our_load_model(model_file):
|
|
if model_file.endswith('.h5') and Path(model_file[:-3]).exists():
|
|
# prefer SavedModel over HDF5 format if it exists
|
|
model_file = model_file[:-3]
|
|
try:
|
|
model = load_model(model_file, compile=False)
|
|
except:
|
|
model = load_model(model_file, compile=False, custom_objects={
|
|
"PatchEncoder": PatchEncoder, "Patches": Patches})
|
|
return model
|
|
|
|
def predict_enhancement(self, img):
|
|
self.logger.debug("enter predict_enhancement")
|
|
|
|
img_height_model = self.model_enhancement.layers[-1].output_shape[1]
|
|
img_width_model = self.model_enhancement.layers[-1].output_shape[2]
|
|
if img.shape[0] < img_height_model:
|
|
img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST)
|
|
if img.shape[1] < img_width_model:
|
|
img = cv2.resize(img, (img_height_model, img.shape[0]), interpolation=cv2.INTER_NEAREST)
|
|
margin = int(0.1 * img_width_model)
|
|
width_mid = img_width_model - 2 * margin
|
|
height_mid = img_height_model - 2 * margin
|
|
img = img / 255.
|
|
img_h = img.shape[0]
|
|
img_w = img.shape[1]
|
|
|
|
prediction_true = np.zeros((img_h, img_w, 3))
|
|
nxf = img_w / float(width_mid)
|
|
nyf = img_h / float(height_mid)
|
|
nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf)
|
|
nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf)
|
|
|
|
for i in range(nxf):
|
|
for j in range(nyf):
|
|
if i == 0:
|
|
index_x_d = i * width_mid
|
|
index_x_u = index_x_d + img_width_model
|
|
else:
|
|
index_x_d = i * width_mid
|
|
index_x_u = index_x_d + img_width_model
|
|
if j == 0:
|
|
index_y_d = j * height_mid
|
|
index_y_u = index_y_d + img_height_model
|
|
else:
|
|
index_y_d = j * height_mid
|
|
index_y_u = index_y_d + img_height_model
|
|
|
|
if index_x_u > img_w:
|
|
index_x_u = img_w
|
|
index_x_d = img_w - img_width_model
|
|
if index_y_u > img_h:
|
|
index_y_u = img_h
|
|
index_y_d = img_h - img_height_model
|
|
|
|
img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :]
|
|
label_p_pred = self.model_enhancement.predict(img_patch, verbose=0)
|
|
seg = label_p_pred[0, :, :, :] * 255
|
|
|
|
if i == 0 and j == 0:
|
|
prediction_true[index_y_d + 0:index_y_u - margin,
|
|
index_x_d + 0:index_x_u - margin] = \
|
|
seg[0:-margin or None,
|
|
0:-margin or None]
|
|
elif i == nxf - 1 and j == nyf - 1:
|
|
prediction_true[index_y_d + margin:index_y_u - 0,
|
|
index_x_d + margin:index_x_u - 0] = \
|
|
seg[margin:,
|
|
margin:]
|
|
elif i == 0 and j == nyf - 1:
|
|
prediction_true[index_y_d + margin:index_y_u - 0,
|
|
index_x_d + 0:index_x_u - margin] = \
|
|
seg[margin:,
|
|
0:-margin or None]
|
|
elif i == nxf - 1 and j == 0:
|
|
prediction_true[index_y_d + 0:index_y_u - margin,
|
|
index_x_d + margin:index_x_u - 0] = \
|
|
seg[0:-margin or None,
|
|
margin:]
|
|
elif i == 0 and j != 0 and j != nyf - 1:
|
|
prediction_true[index_y_d + margin:index_y_u - margin,
|
|
index_x_d + 0:index_x_u - margin] = \
|
|
seg[margin:-margin or None,
|
|
0:-margin or None]
|
|
elif i == nxf - 1 and j != 0 and j != nyf - 1:
|
|
prediction_true[index_y_d + margin:index_y_u - margin,
|
|
index_x_d + margin:index_x_u - 0] = \
|
|
seg[margin:-margin or None,
|
|
margin:]
|
|
elif i != 0 and i != nxf - 1 and j == 0:
|
|
prediction_true[index_y_d + 0:index_y_u - margin,
|
|
index_x_d + margin:index_x_u - margin] = \
|
|
seg[0:-margin or None,
|
|
margin:-margin or None]
|
|
elif i != 0 and i != nxf - 1 and j == nyf - 1:
|
|
prediction_true[index_y_d + margin:index_y_u - 0,
|
|
index_x_d + margin:index_x_u - margin] = \
|
|
seg[margin:,
|
|
margin:-margin or None]
|
|
else:
|
|
prediction_true[index_y_d + margin:index_y_u - margin,
|
|
index_x_d + margin:index_x_u - margin] = \
|
|
seg[margin:-margin or None,
|
|
margin:-margin or None]
|
|
|
|
prediction_true = prediction_true.astype(int)
|
|
return prediction_true
|
|
|
|
def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred):
|
|
self.logger.debug("enter calculate_width_height_by_columns")
|
|
if num_col == 1:
|
|
img_w_new = 2000
|
|
elif num_col == 2:
|
|
img_w_new = 2400
|
|
elif num_col == 3:
|
|
img_w_new = 3000
|
|
elif num_col == 4:
|
|
img_w_new = 4000
|
|
elif num_col == 5:
|
|
img_w_new = 5000
|
|
elif num_col == 6:
|
|
img_w_new = 6500
|
|
else:
|
|
img_w_new = width_early
|
|
img_h_new = img_w_new * img.shape[0] // img.shape[1]
|
|
|
|
if img_h_new >= 8000:
|
|
img_new = np.copy(img)
|
|
num_column_is_classified = False
|
|
else:
|
|
img_new = resize_image(img, img_h_new, img_w_new)
|
|
num_column_is_classified = True
|
|
|
|
return img_new, num_column_is_classified
|
|
|
|
def early_page_for_num_of_column_classification(self,img_bin):
|
|
self.logger.debug("enter early_page_for_num_of_column_classification")
|
|
if self.input_binary:
|
|
img = np.copy(img_bin).astype(np.uint8)
|
|
else:
|
|
img = self.imread()
|
|
img = cv2.GaussianBlur(img, (5, 5), 0)
|
|
img_page_prediction = self.do_prediction(False, img, self.model_page)
|
|
|
|
imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
|
|
_, thresh = cv2.threshold(imgray, 0, 255, 0)
|
|
thresh = cv2.dilate(thresh, KERNEL, iterations=3)
|
|
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
|
if len(contours)>0:
|
|
cnt_size = np.array([cv2.contourArea(contours[j])
|
|
for j in range(len(contours))])
|
|
cnt = contours[np.argmax(cnt_size)]
|
|
box = cv2.boundingRect(cnt)
|
|
else:
|
|
box = [0, 0, img.shape[1], img.shape[0]]
|
|
cropped_page, page_coord = crop_image_inside_box(box, img)
|
|
|
|
self.logger.debug("exit early_page_for_num_of_column_classification")
|
|
return cropped_page, page_coord
|
|
|
|
def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label_p_pred):
|
|
self.logger.debug("enter calculate_width_height_by_columns")
|
|
if num_col == 1:
|
|
img_w_new = 1000
|
|
else:
|
|
img_w_new = 1300
|
|
img_h_new = img_w_new * img.shape[0] // img.shape[1]
|
|
|
|
if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early:
|
|
img_new = np.copy(img)
|
|
num_column_is_classified = False
|
|
#elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000:
|
|
elif img_h_new >= 8000:
|
|
img_new = np.copy(img)
|
|
num_column_is_classified = False
|
|
else:
|
|
img_new = resize_image(img, img_h_new, img_w_new)
|
|
num_column_is_classified = True
|
|
|
|
return img_new, num_column_is_classified
|
|
|
|
def resize_and_enhance_image_with_column_classifier(self, light_version):
|
|
self.logger.debug("enter resize_and_enhance_image_with_column_classifier")
|
|
dpi = 0#self.dpi
|
|
self.logger.info("Detected %s DPI", dpi)
|
|
if self.input_binary:
|
|
img = self.imread()
|
|
prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5)
|
|
prediction_bin = 255 * (prediction_bin[:,:,0]==0)
|
|
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8)
|
|
img= np.copy(prediction_bin)
|
|
img_bin = prediction_bin
|
|
else:
|
|
img = self.imread()
|
|
self.h_org, self.w_org = img.shape[:2]
|
|
img_bin = None
|
|
|
|
width_early = img.shape[1]
|
|
t1 = time.time()
|
|
_, page_coord = self.early_page_for_num_of_column_classification(img_bin)
|
|
|
|
self.image_page_org_size = img[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3], :]
|
|
self.page_coord = page_coord
|
|
|
|
if self.num_col_upper and not self.num_col_lower:
|
|
num_col = self.num_col_upper
|
|
label_p_pred = [np.ones(6)]
|
|
elif self.num_col_lower and not self.num_col_upper:
|
|
num_col = self.num_col_lower
|
|
label_p_pred = [np.ones(6)]
|
|
elif not self.num_col_upper and not self.num_col_lower:
|
|
if self.input_binary:
|
|
img_in = np.copy(img)
|
|
img_in = img_in / 255.0
|
|
img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST)
|
|
img_in = img_in.reshape(1, 448, 448, 3)
|
|
else:
|
|
img_1ch = self.imread(grayscale=True)
|
|
width_early = img_1ch.shape[1]
|
|
img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
|
|
|
img_1ch = img_1ch / 255.0
|
|
img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
|
|
img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
|
|
img_in[0, :, :, 0] = img_1ch[:, :]
|
|
img_in[0, :, :, 1] = img_1ch[:, :]
|
|
img_in[0, :, :, 2] = img_1ch[:, :]
|
|
|
|
label_p_pred = self.model_classifier.predict(img_in, verbose=0)
|
|
num_col = np.argmax(label_p_pred[0]) + 1
|
|
elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower):
|
|
if self.input_binary:
|
|
img_in = np.copy(img)
|
|
img_in = img_in / 255.0
|
|
img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST)
|
|
img_in = img_in.reshape(1, 448, 448, 3)
|
|
else:
|
|
img_1ch = self.imread(grayscale=True)
|
|
width_early = img_1ch.shape[1]
|
|
img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
|
|
|
img_1ch = img_1ch / 255.0
|
|
img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
|
|
img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
|
|
img_in[0, :, :, 0] = img_1ch[:, :]
|
|
img_in[0, :, :, 1] = img_1ch[:, :]
|
|
img_in[0, :, :, 2] = img_1ch[:, :]
|
|
|
|
label_p_pred = self.model_classifier.predict(img_in, verbose=0)
|
|
num_col = np.argmax(label_p_pred[0]) + 1
|
|
|
|
if num_col > self.num_col_upper:
|
|
num_col = self.num_col_upper
|
|
label_p_pred = [np.ones(6)]
|
|
if num_col < self.num_col_lower:
|
|
num_col = self.num_col_lower
|
|
label_p_pred = [np.ones(6)]
|
|
else:
|
|
num_col = self.num_col_upper
|
|
label_p_pred = [np.ones(6)]
|
|
|
|
self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5))
|
|
|
|
if dpi < DPI_THRESHOLD:
|
|
if light_version and num_col in (1,2):
|
|
img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(
|
|
img, num_col, width_early, label_p_pred)
|
|
else:
|
|
img_new, num_column_is_classified = self.calculate_width_height_by_columns(
|
|
img, num_col, width_early, label_p_pred)
|
|
if light_version:
|
|
image_res = np.copy(img_new)
|
|
else:
|
|
image_res = self.predict_enhancement(img_new)
|
|
is_image_enhanced = True
|
|
|
|
else:
|
|
num_column_is_classified = True
|
|
image_res = np.copy(img)
|
|
is_image_enhanced = False
|
|
|
|
self.logger.debug("exit resize_and_enhance_image_with_column_classifier")
|
|
return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin
|
|
def read_xml(self, xml_file):
|
|
file_name = Path(xml_file).stem
|
|
tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding='utf-8'))
|
|
root1=tree1.getroot()
|
|
alltags=[elem.tag for elem in root1.iter()]
|
|
link=alltags[0].split('}')[0]+'}'
|
|
|
|
index_tot_regions = []
|
|
tot_region_ref = []
|
|
|
|
for jj in root1.iter(link+'Page'):
|
|
y_len=int(jj.attrib['imageHeight'])
|
|
x_len=int(jj.attrib['imageWidth'])
|
|
|
|
for jj in root1.iter(link+'RegionRefIndexed'):
|
|
index_tot_regions.append(jj.attrib['index'])
|
|
tot_region_ref.append(jj.attrib['regionRef'])
|
|
|
|
if (link+'PrintSpace' in alltags) or (link+'Border' in alltags):
|
|
co_printspace = []
|
|
if link+'PrintSpace' in alltags:
|
|
region_tags_printspace = np.unique([x for x in alltags if x.endswith('PrintSpace')])
|
|
elif link+'Border' in alltags:
|
|
region_tags_printspace = np.unique([x for x in alltags if x.endswith('Border')])
|
|
|
|
for tag in region_tags_printspace:
|
|
if link+'PrintSpace' in alltags:
|
|
tag_endings_printspace = ['}PrintSpace','}printspace']
|
|
elif link+'Border' in alltags:
|
|
tag_endings_printspace = ['}Border','}border']
|
|
|
|
if tag.endswith(tag_endings_printspace[0]) or tag.endswith(tag_endings_printspace[1]):
|
|
for nn in root1.iter(tag):
|
|
c_t_in = []
|
|
sumi = 0
|
|
for vv in nn.iter():
|
|
# check the format of coords
|
|
if vv.tag == link + 'Coords':
|
|
coords = bool(vv.attrib)
|
|
if coords:
|
|
p_h = vv.attrib['points'].split(' ')
|
|
c_t_in.append(
|
|
np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h]))
|
|
break
|
|
else:
|
|
pass
|
|
|
|
if vv.tag == link + 'Point':
|
|
c_t_in.append([int(float(vv.attrib['x'])), int(float(vv.attrib['y']))])
|
|
sumi += 1
|
|
elif vv.tag != link + 'Point' and sumi >= 1:
|
|
break
|
|
co_printspace.append(np.array(c_t_in))
|
|
img_printspace = np.zeros( (y_len,x_len,3) )
|
|
img_printspace=cv2.fillPoly(img_printspace, pts =co_printspace, color=(1,1,1))
|
|
img_printspace = img_printspace.astype(np.uint8)
|
|
|
|
imgray = cv2.cvtColor(img_printspace, cv2.COLOR_BGR2GRAY)
|
|
_, thresh = cv2.threshold(imgray, 0, 255, 0)
|
|
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
|
cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))])
|
|
cnt = contours[np.argmax(cnt_size)]
|
|
x, y, w, h = cv2.boundingRect(cnt)
|
|
|
|
bb_coord_printspace = [x, y, w, h]
|
|
|
|
else:
|
|
bb_coord_printspace = None
|
|
|
|
|
|
region_tags=np.unique([x for x in alltags if x.endswith('Region')])
|
|
co_text_paragraph=[]
|
|
co_text_drop=[]
|
|
co_text_heading=[]
|
|
co_text_header=[]
|
|
co_text_marginalia=[]
|
|
co_text_catch=[]
|
|
co_text_page_number=[]
|
|
co_text_signature_mark=[]
|
|
co_sep=[]
|
|
co_img=[]
|
|
co_table=[]
|
|
co_graphic=[]
|
|
co_graphic_text_annotation=[]
|
|
co_graphic_decoration=[]
|
|
co_noise=[]
|
|
|
|
co_text_paragraph_text=[]
|
|
co_text_drop_text=[]
|
|
co_text_heading_text=[]
|
|
co_text_header_text=[]
|
|
co_text_marginalia_text=[]
|
|
co_text_catch_text=[]
|
|
co_text_page_number_text=[]
|
|
co_text_signature_mark_text=[]
|
|
co_sep_text=[]
|
|
co_img_text=[]
|
|
co_table_text=[]
|
|
co_graphic_text=[]
|
|
co_graphic_text_annotation_text=[]
|
|
co_graphic_decoration_text=[]
|
|
co_noise_text=[]
|
|
|
|
id_paragraph = []
|
|
id_header = []
|
|
id_heading = []
|
|
id_marginalia = []
|
|
|
|
for tag in region_tags:
|
|
if tag.endswith('}TextRegion') or tag.endswith('}Textregion'):
|
|
for nn in root1.iter(tag):
|
|
for child2 in nn:
|
|
tag2 = child2.tag
|
|
if tag2.endswith('}TextEquiv') or tag2.endswith('}TextEquiv'):
|
|
for childtext2 in child2:
|
|
if childtext2.tag.endswith('}Unicode') or childtext2.tag.endswith('}Unicode'):
|
|
if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
|
|
co_text_drop_text.append(childtext2.text)
|
|
elif "type" in nn.attrib and nn.attrib['type']=='heading':
|
|
co_text_heading_text.append(childtext2.text)
|
|
elif "type" in nn.attrib and nn.attrib['type']=='signature-mark':
|
|
co_text_signature_mark_text.append(childtext2.text)
|
|
elif "type" in nn.attrib and nn.attrib['type']=='header':
|
|
co_text_header_text.append(childtext2.text)
|
|
###elif "type" in nn.attrib and nn.attrib['type']=='catch-word':
|
|
###co_text_catch_text.append(childtext2.text)
|
|
###elif "type" in nn.attrib and nn.attrib['type']=='page-number':
|
|
###co_text_page_number_text.append(childtext2.text)
|
|
elif "type" in nn.attrib and nn.attrib['type']=='marginalia':
|
|
co_text_marginalia_text.append(childtext2.text)
|
|
else:
|
|
co_text_paragraph_text.append(childtext2.text)
|
|
c_t_in_drop=[]
|
|
c_t_in_paragraph=[]
|
|
c_t_in_heading=[]
|
|
c_t_in_header=[]
|
|
c_t_in_page_number=[]
|
|
c_t_in_signature_mark=[]
|
|
c_t_in_catch=[]
|
|
c_t_in_marginalia=[]
|
|
|
|
|
|
sumi=0
|
|
for vv in nn.iter():
|
|
# check the format of coords
|
|
if vv.tag==link+'Coords':
|
|
|
|
coords=bool(vv.attrib)
|
|
if coords:
|
|
#print('birda1')
|
|
p_h=vv.attrib['points'].split(' ')
|
|
|
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
|
|
|
|
c_t_in_drop.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
elif "type" in nn.attrib and nn.attrib['type']=='heading':
|
|
##id_heading.append(nn.attrib['id'])
|
|
c_t_in_heading.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
elif "type" in nn.attrib and nn.attrib['type']=='signature-mark':
|
|
|
|
c_t_in_signature_mark.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
#print(c_t_in_paragraph)
|
|
elif "type" in nn.attrib and nn.attrib['type']=='header':
|
|
#id_header.append(nn.attrib['id'])
|
|
c_t_in_header.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
###elif "type" in nn.attrib and nn.attrib['type']=='catch-word':
|
|
###c_t_in_catch.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
###elif "type" in nn.attrib and nn.attrib['type']=='page-number':
|
|
|
|
###c_t_in_page_number.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
elif "type" in nn.attrib and nn.attrib['type']=='marginalia':
|
|
#id_marginalia.append(nn.attrib['id'])
|
|
|
|
c_t_in_marginalia.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
else:
|
|
#id_paragraph.append(nn.attrib['id'])
|
|
|
|
c_t_in_paragraph.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
break
|
|
else:
|
|
pass
|
|
|
|
|
|
if vv.tag==link+'Point':
|
|
if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
|
|
|
|
c_t_in_drop.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
sumi+=1
|
|
|
|
elif "type" in nn.attrib and nn.attrib['type']=='heading':
|
|
#id_heading.append(nn.attrib['id'])
|
|
c_t_in_heading.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
sumi+=1
|
|
|
|
|
|
elif "type" in nn.attrib and nn.attrib['type']=='signature-mark':
|
|
|
|
c_t_in_signature_mark.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
sumi+=1
|
|
elif "type" in nn.attrib and nn.attrib['type']=='header':
|
|
#id_header.append(nn.attrib['id'])
|
|
c_t_in_header.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
sumi+=1
|
|
|
|
|
|
###elif "type" in nn.attrib and nn.attrib['type']=='catch-word':
|
|
###c_t_in_catch.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
###sumi+=1
|
|
|
|
###elif "type" in nn.attrib and nn.attrib['type']=='page-number':
|
|
|
|
###c_t_in_page_number.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
###sumi+=1
|
|
|
|
elif "type" in nn.attrib and nn.attrib['type']=='marginalia':
|
|
#id_marginalia.append(nn.attrib['id'])
|
|
|
|
c_t_in_marginalia.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
sumi+=1
|
|
|
|
else:
|
|
#id_paragraph.append(nn.attrib['id'])
|
|
c_t_in_paragraph.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
sumi+=1
|
|
|
|
elif vv.tag!=link+'Point' and sumi>=1:
|
|
break
|
|
|
|
if len(c_t_in_drop)>0:
|
|
co_text_drop.append(np.array(c_t_in_drop))
|
|
if len(c_t_in_paragraph)>0:
|
|
co_text_paragraph.append(np.array(c_t_in_paragraph))
|
|
id_paragraph.append(nn.attrib['id'])
|
|
if len(c_t_in_heading)>0:
|
|
co_text_heading.append(np.array(c_t_in_heading))
|
|
id_heading.append(nn.attrib['id'])
|
|
|
|
if len(c_t_in_header)>0:
|
|
co_text_header.append(np.array(c_t_in_header))
|
|
id_header.append(nn.attrib['id'])
|
|
if len(c_t_in_page_number)>0:
|
|
co_text_page_number.append(np.array(c_t_in_page_number))
|
|
if len(c_t_in_catch)>0:
|
|
co_text_catch.append(np.array(c_t_in_catch))
|
|
|
|
if len(c_t_in_signature_mark)>0:
|
|
co_text_signature_mark.append(np.array(c_t_in_signature_mark))
|
|
|
|
if len(c_t_in_marginalia)>0:
|
|
co_text_marginalia.append(np.array(c_t_in_marginalia))
|
|
id_marginalia.append(nn.attrib['id'])
|
|
|
|
|
|
elif tag.endswith('}GraphicRegion') or tag.endswith('}graphicregion'):
|
|
for nn in root1.iter(tag):
|
|
c_t_in=[]
|
|
c_t_in_text_annotation=[]
|
|
c_t_in_decoration=[]
|
|
sumi=0
|
|
for vv in nn.iter():
|
|
# check the format of coords
|
|
if vv.tag==link+'Coords':
|
|
coords=bool(vv.attrib)
|
|
if coords:
|
|
p_h=vv.attrib['points'].split(' ')
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation':
|
|
c_t_in_text_annotation.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
elif "type" in nn.attrib and nn.attrib['type']=='decoration':
|
|
c_t_in_decoration.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
else:
|
|
c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
break
|
|
else:
|
|
pass
|
|
|
|
|
|
if vv.tag==link+'Point':
|
|
if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation':
|
|
c_t_in_text_annotation.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
sumi+=1
|
|
|
|
elif "type" in nn.attrib and nn.attrib['type']=='decoration':
|
|
c_t_in_decoration.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
sumi+=1
|
|
|
|
else:
|
|
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
sumi+=1
|
|
|
|
if len(c_t_in_text_annotation)>0:
|
|
co_graphic_text_annotation.append(np.array(c_t_in_text_annotation))
|
|
if len(c_t_in_decoration)>0:
|
|
co_graphic_decoration.append(np.array(c_t_in_decoration))
|
|
if len(c_t_in)>0:
|
|
co_graphic.append(np.array(c_t_in))
|
|
|
|
|
|
|
|
elif tag.endswith('}ImageRegion') or tag.endswith('}imageregion'):
|
|
for nn in root1.iter(tag):
|
|
c_t_in=[]
|
|
sumi=0
|
|
for vv in nn.iter():
|
|
# check the format of coords
|
|
if vv.tag==link+'Coords':
|
|
coords=bool(vv.attrib)
|
|
if coords:
|
|
p_h=vv.attrib['points'].split(' ')
|
|
c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
break
|
|
else:
|
|
pass
|
|
|
|
|
|
if vv.tag==link+'Point':
|
|
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
sumi+=1
|
|
elif vv.tag!=link+'Point' and sumi>=1:
|
|
break
|
|
co_img.append(np.array(c_t_in))
|
|
co_img_text.append(' ')
|
|
|
|
|
|
elif tag.endswith('}SeparatorRegion') or tag.endswith('}separatorregion'):
|
|
for nn in root1.iter(tag):
|
|
c_t_in=[]
|
|
sumi=0
|
|
for vv in nn.iter():
|
|
# check the format of coords
|
|
if vv.tag==link+'Coords':
|
|
coords=bool(vv.attrib)
|
|
if coords:
|
|
p_h=vv.attrib['points'].split(' ')
|
|
c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
break
|
|
else:
|
|
pass
|
|
|
|
|
|
if vv.tag==link+'Point':
|
|
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
sumi+=1
|
|
elif vv.tag!=link+'Point' and sumi>=1:
|
|
break
|
|
co_sep.append(np.array(c_t_in))
|
|
|
|
|
|
|
|
elif tag.endswith('}TableRegion') or tag.endswith('}tableregion'):
|
|
for nn in root1.iter(tag):
|
|
c_t_in=[]
|
|
sumi=0
|
|
for vv in nn.iter():
|
|
# check the format of coords
|
|
if vv.tag==link+'Coords':
|
|
coords=bool(vv.attrib)
|
|
if coords:
|
|
p_h=vv.attrib['points'].split(' ')
|
|
c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
break
|
|
else:
|
|
pass
|
|
|
|
|
|
if vv.tag==link+'Point':
|
|
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
sumi+=1
|
|
|
|
elif vv.tag!=link+'Point' and sumi>=1:
|
|
break
|
|
co_table.append(np.array(c_t_in))
|
|
co_table_text.append(' ')
|
|
|
|
elif tag.endswith('}NoiseRegion') or tag.endswith('}noiseregion'):
|
|
for nn in root1.iter(tag):
|
|
c_t_in=[]
|
|
sumi=0
|
|
for vv in nn.iter():
|
|
# check the format of coords
|
|
if vv.tag==link+'Coords':
|
|
coords=bool(vv.attrib)
|
|
if coords:
|
|
p_h=vv.attrib['points'].split(' ')
|
|
c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
break
|
|
else:
|
|
pass
|
|
|
|
|
|
if vv.tag==link+'Point':
|
|
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
sumi+=1
|
|
|
|
elif vv.tag!=link+'Point' and sumi>=1:
|
|
break
|
|
co_noise.append(np.array(c_t_in))
|
|
co_noise_text.append(' ')
|
|
|
|
img = np.zeros( (y_len,x_len,3) )
|
|
img_poly=cv2.fillPoly(img, pts =co_text_paragraph, color=(1,1,1))
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_heading, color=(2,2,2))
|
|
img_poly=cv2.fillPoly(img, pts =co_text_header, color=(2,2,2))
|
|
img_poly=cv2.fillPoly(img, pts =co_text_marginalia, color=(3,3,3))
|
|
img_poly=cv2.fillPoly(img, pts =co_img, color=(4,4,4))
|
|
img_poly=cv2.fillPoly(img, pts =co_sep, color=(5,5,5))
|
|
|
|
return tree1, root1, bb_coord_printspace, file_name, id_paragraph, id_header+id_heading, co_text_paragraph, co_text_header+co_text_heading,\
|
|
tot_region_ref,x_len, y_len,index_tot_regions, img_poly
|
|
|
|
def return_indexes_of_contours_loctaed_inside_another_list_of_contours(self, contours, contours_loc, cx_main_loc, cy_main_loc, indexes_loc):
|
|
indexes_of_located_cont = []
|
|
center_x_coordinates_of_located = []
|
|
center_y_coordinates_of_located = []
|
|
#M_main_tot = [cv2.moments(contours_loc[j])
|
|
#for j in range(len(contours_loc))]
|
|
#cx_main_loc = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))]
|
|
#cy_main_loc = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))]
|
|
|
|
for ij in range(len(contours)):
|
|
results = [cv2.pointPolygonTest(contours[ij], (cx_main_loc[ind], cy_main_loc[ind]), False)
|
|
for ind in range(len(cy_main_loc)) ]
|
|
results = np.array(results)
|
|
indexes_in = np.where((results == 0) | (results == 1))
|
|
indexes = indexes_loc[indexes_in]# [(results == 0) | (results == 1)]#np.where((results == 0) | (results == 1))
|
|
|
|
indexes_of_located_cont.append(indexes)
|
|
center_x_coordinates_of_located.append(np.array(cx_main_loc)[indexes_in] )
|
|
center_y_coordinates_of_located.append(np.array(cy_main_loc)[indexes_in] )
|
|
|
|
return indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located
|
|
|
|
def do_order_of_regions_with_model(self, contours_only_text_parent, contours_only_text_parent_h, text_regions_p):
|
|
height1 =672#448
|
|
width1 = 448#224
|
|
|
|
height2 =672#448
|
|
width2= 448#224
|
|
|
|
height3 =672#448
|
|
width3 = 448#224
|
|
|
|
inference_bs = 3
|
|
|
|
ver_kernel = np.ones((5, 1), dtype=np.uint8)
|
|
hor_kernel = np.ones((1, 5), dtype=np.uint8)
|
|
|
|
|
|
min_cont_size_to_be_dilated = 10
|
|
if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
|
|
cx_conts, cy_conts, x_min_conts, x_max_conts, y_min_conts, y_max_conts, _ = find_new_features_of_contours(contours_only_text_parent)
|
|
args_cont_located = np.array(range(len(contours_only_text_parent)))
|
|
|
|
diff_y_conts = np.abs(y_max_conts[:]-y_min_conts)
|
|
diff_x_conts = np.abs(x_max_conts[:]-x_min_conts)
|
|
|
|
mean_x = statistics.mean(diff_x_conts)
|
|
median_x = statistics.median(diff_x_conts)
|
|
|
|
|
|
diff_x_ratio= diff_x_conts/mean_x
|
|
|
|
args_cont_located_excluded = args_cont_located[diff_x_ratio>=1.3]
|
|
args_cont_located_included = args_cont_located[diff_x_ratio<1.3]
|
|
|
|
contours_only_text_parent_excluded = [contours_only_text_parent[ind] for ind in range(len(contours_only_text_parent)) if diff_x_ratio[ind]>=1.3]#contours_only_text_parent[diff_x_ratio>=1.3]
|
|
contours_only_text_parent_included = [contours_only_text_parent[ind] for ind in range(len(contours_only_text_parent)) if diff_x_ratio[ind]<1.3]#contours_only_text_parent[diff_x_ratio<1.3]
|
|
|
|
|
|
cx_conts_excluded = [cx_conts[ind] for ind in range(len(cx_conts)) if diff_x_ratio[ind]>=1.3]#cx_conts[diff_x_ratio>=1.3]
|
|
cx_conts_included = [cx_conts[ind] for ind in range(len(cx_conts)) if diff_x_ratio[ind]<1.3]#cx_conts[diff_x_ratio<1.3]
|
|
|
|
cy_conts_excluded = [cy_conts[ind] for ind in range(len(cy_conts)) if diff_x_ratio[ind]>=1.3]#cy_conts[diff_x_ratio>=1.3]
|
|
cy_conts_included = [cy_conts[ind] for ind in range(len(cy_conts)) if diff_x_ratio[ind]<1.3]#cy_conts[diff_x_ratio<1.3]
|
|
|
|
#print(diff_x_ratio, 'ratio')
|
|
text_regions_p = text_regions_p.astype('uint8')
|
|
|
|
if len(contours_only_text_parent_excluded)>0:
|
|
textregion_par = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1])).astype('uint8')
|
|
textregion_par = cv2.fillPoly(textregion_par, pts=contours_only_text_parent_included, color=(1,1))
|
|
else:
|
|
textregion_par = (text_regions_p[:,:]==1)*1
|
|
textregion_par = textregion_par.astype('uint8')
|
|
|
|
text_regions_p_textregions_dilated = cv2.erode(textregion_par , hor_kernel, iterations=2)
|
|
text_regions_p_textregions_dilated = cv2.dilate(text_regions_p_textregions_dilated , ver_kernel, iterations=4)
|
|
text_regions_p_textregions_dilated = cv2.erode(text_regions_p_textregions_dilated , hor_kernel, iterations=1)
|
|
text_regions_p_textregions_dilated = cv2.dilate(text_regions_p_textregions_dilated , ver_kernel, iterations=5)
|
|
text_regions_p_textregions_dilated[text_regions_p[:,:]>1] = 0
|
|
|
|
|
|
contours_only_dilated, hir_on_text_dilated = return_contours_of_image(text_regions_p_textregions_dilated)
|
|
contours_only_dilated = return_parent_contours(contours_only_dilated, hir_on_text_dilated)
|
|
|
|
indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located = self.return_indexes_of_contours_loctaed_inside_another_list_of_contours(contours_only_dilated, contours_only_text_parent_included, cx_conts_included, cy_conts_included, args_cont_located_included)
|
|
|
|
|
|
if len(args_cont_located_excluded)>0:
|
|
for ind in args_cont_located_excluded:
|
|
indexes_of_located_cont.append(np.array([ind]))
|
|
contours_only_dilated.append(contours_only_text_parent[ind])
|
|
center_y_coordinates_of_located.append(0)
|
|
|
|
array_list = [np.array([elem]) if isinstance(elem, int) else elem for elem in indexes_of_located_cont]
|
|
flattened_array = np.concatenate([arr.ravel() for arr in array_list])
|
|
#print(len( np.unique(flattened_array)), 'indexes_of_located_cont uniques')
|
|
|
|
missing_textregions = list( set(np.array(range(len(contours_only_text_parent))) ) - set(np.unique(flattened_array)) )
|
|
#print(missing_textregions, 'missing_textregions')
|
|
|
|
for ind in missing_textregions:
|
|
indexes_of_located_cont.append(np.array([ind]))
|
|
contours_only_dilated.append(contours_only_text_parent[ind])
|
|
center_y_coordinates_of_located.append(0)
|
|
|
|
|
|
if contours_only_text_parent_h:
|
|
for vi in range(len(contours_only_text_parent_h)):
|
|
indexes_of_located_cont.append(int(vi+len(contours_only_text_parent)))
|
|
|
|
array_list = [np.array([elem]) if isinstance(elem, int) else elem for elem in indexes_of_located_cont]
|
|
flattened_array = np.concatenate([arr.ravel() for arr in array_list])
|
|
|
|
y_len = text_regions_p.shape[0]
|
|
x_len = text_regions_p.shape[1]
|
|
|
|
img_poly = np.zeros((y_len,x_len), dtype='uint8')
|
|
###img_poly[text_regions_p[:,:]==1] = 1
|
|
###img_poly[text_regions_p[:,:]==2] = 2
|
|
###img_poly[text_regions_p[:,:]==3] = 4
|
|
###img_poly[text_regions_p[:,:]==6] = 5
|
|
|
|
##img_poly[text_regions_p[:,:]==1] = 1
|
|
##img_poly[text_regions_p[:,:]==2] = 2
|
|
##img_poly[text_regions_p[:,:]==3] = 3
|
|
##img_poly[text_regions_p[:,:]==4] = 4
|
|
##img_poly[text_regions_p[:,:]==5] = 5
|
|
|
|
img_poly = np.copy(text_regions_p)
|
|
|
|
img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8')
|
|
if contours_only_text_parent_h:
|
|
_, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(
|
|
contours_only_text_parent_h)
|
|
for j in range(len(cy_main)):
|
|
img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,
|
|
int(x_min_main[j]):int(x_max_main[j])] = 1
|
|
co_text_all_org = contours_only_text_parent + contours_only_text_parent_h
|
|
if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
|
|
co_text_all = contours_only_dilated + contours_only_text_parent_h
|
|
else:
|
|
co_text_all = contours_only_text_parent + contours_only_text_parent_h
|
|
else:
|
|
co_text_all_org = contours_only_text_parent
|
|
if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
|
|
co_text_all = contours_only_dilated
|
|
else:
|
|
co_text_all = contours_only_text_parent
|
|
|
|
if not len(co_text_all):
|
|
return [], []
|
|
|
|
labels_con = np.zeros((int(y_len /6.), int(x_len/6.), len(co_text_all)), dtype=bool)
|
|
|
|
co_text_all = [(i/6).astype(int) for i in co_text_all]
|
|
for i in range(len(co_text_all)):
|
|
img = labels_con[:,:,i].astype(np.uint8)
|
|
|
|
#img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
|
|
|
|
cv2.fillPoly(img, pts=[co_text_all[i]], color=(1,))
|
|
labels_con[:,:,i] = img
|
|
|
|
|
|
labels_con = resize_image(labels_con.astype(np.uint8), height1, width1).astype(bool)
|
|
img_header_and_sep = resize_image(img_header_and_sep, height1, width1)
|
|
img_poly = resize_image(img_poly, height3, width3)
|
|
|
|
|
|
|
|
input_1 = np.zeros((inference_bs, height1, width1, 3))
|
|
ordered = [list(range(len(co_text_all)))]
|
|
index_update = 0
|
|
#print(labels_con.shape[2],"number of regions for reading order")
|
|
while index_update>=0:
|
|
ij_list = ordered.pop(index_update)
|
|
i = ij_list.pop(0)
|
|
|
|
ante_list = []
|
|
post_list = []
|
|
tot_counter = 0
|
|
batch = []
|
|
for j in ij_list:
|
|
img1 = labels_con[:,:,i].astype(float)
|
|
img2 = labels_con[:,:,j].astype(float)
|
|
img1[img_poly==5] = 2
|
|
img2[img_poly==5] = 2
|
|
img1[img_header_and_sep==1] = 3
|
|
img2[img_header_and_sep==1] = 3
|
|
|
|
input_1[len(batch), :, :, 0] = img1 / 3.
|
|
input_1[len(batch), :, :, 2] = img2 / 3.
|
|
input_1[len(batch), :, :, 1] = img_poly / 5.
|
|
|
|
tot_counter += 1
|
|
batch.append(j)
|
|
if tot_counter % inference_bs == 0 or tot_counter == len(ij_list):
|
|
y_pr = self.model_reading_order.predict(input_1 , verbose=0)
|
|
for jb, j in enumerate(batch):
|
|
if y_pr[jb][0]>=0.5:
|
|
post_list.append(j)
|
|
else:
|
|
ante_list.append(j)
|
|
batch = []
|
|
|
|
if len(ante_list):
|
|
ordered.insert(index_update, ante_list)
|
|
index_update += 1
|
|
ordered.insert(index_update, [i])
|
|
if len(post_list):
|
|
ordered.insert(index_update + 1, post_list)
|
|
|
|
index_update = -1
|
|
for index_next, ij_list in enumerate(ordered):
|
|
if len(ij_list) > 1:
|
|
index_update = index_next
|
|
break
|
|
|
|
ordered = [i[0] for i in ordered]
|
|
|
|
##id_all_text = np.array(id_all_text)[index_sort]
|
|
|
|
|
|
if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
|
|
org_contours_indexes = []
|
|
for ind in range(len(ordered)):
|
|
region_with_curr_order = ordered[ind]
|
|
if region_with_curr_order < len(contours_only_dilated):
|
|
if np.isscalar(indexes_of_located_cont[region_with_curr_order]):
|
|
org_contours_indexes = org_contours_indexes + [indexes_of_located_cont[region_with_curr_order]]
|
|
else:
|
|
arg_sort_located_cont = np.argsort(center_y_coordinates_of_located[region_with_curr_order])
|
|
org_contours_indexes = org_contours_indexes + list(np.array(indexes_of_located_cont[region_with_curr_order])[arg_sort_located_cont]) ##org_contours_indexes + list (
|
|
else:
|
|
org_contours_indexes = org_contours_indexes + [indexes_of_located_cont[region_with_curr_order]]
|
|
|
|
region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))]
|
|
return org_contours_indexes, region_ids
|
|
else:
|
|
region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))]
|
|
return ordered, region_ids
|
|
|
|
|
|
|
|
|
|
def run(self, xml_filename : Optional[str] = None, dir_in : Optional[str] = None, overwrite : bool = False):
|
|
"""
|
|
Get image and scales, then extract the page of scanned image
|
|
"""
|
|
self.logger.debug("enter run")
|
|
t0_tot = time.time()
|
|
|
|
if dir_in:
|
|
self.ls_xmls = os.listdir(dir_in)
|
|
elif xml_filename:
|
|
self.ls_xmls = [xml_filename]
|
|
else:
|
|
raise ValueError("run requires either a single image filename or a directory")
|
|
|
|
for xml_filename in self.ls_xmls:
|
|
self.logger.info(xml_filename)
|
|
t0 = time.time()
|
|
|
|
if dir_in:
|
|
xml_file = os.path.join(dir_in, xml_filename)
|
|
else:
|
|
xml_file = xml_filename
|
|
|
|
tree_xml, root_xml, bb_coord_printspace, file_name, id_paragraph, id_header, co_text_paragraph, co_text_header, tot_region_ref, x_len, y_len, index_tot_regions, img_poly = self.read_xml(xml_file)
|
|
|
|
id_all_text = id_paragraph + id_header
|
|
|
|
order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(co_text_paragraph, co_text_header, img_poly[:,:,0])
|
|
|
|
id_all_text = np.array(id_all_text)[order_text_new]
|
|
|
|
alltags=[elem.tag for elem in root_xml.iter()]
|
|
|
|
|
|
|
|
link=alltags[0].split('}')[0]+'}'
|
|
name_space = alltags[0].split('}')[0]
|
|
name_space = name_space.split('{')[1]
|
|
|
|
page_element = root_xml.find(link+'Page')
|
|
|
|
|
|
old_ro = root_xml.find(".//{*}ReadingOrder")
|
|
|
|
if old_ro is not None:
|
|
page_element.remove(old_ro)
|
|
|
|
#print(old_ro, 'old_ro')
|
|
ro_subelement = ET.Element('ReadingOrder')
|
|
|
|
ro_subelement2 = ET.SubElement(ro_subelement, 'OrderedGroup')
|
|
ro_subelement2.set('id', "ro357564684568544579089")
|
|
|
|
for index, id_text in enumerate(id_all_text):
|
|
new_element_2 = ET.SubElement(ro_subelement2, 'RegionRefIndexed')
|
|
new_element_2.set('regionRef', id_all_text[index])
|
|
new_element_2.set('index', str(index))
|
|
|
|
if (link+'PrintSpace' in alltags) or (link+'Border' in alltags):
|
|
page_element.insert(1, ro_subelement)
|
|
else:
|
|
page_element.insert(0, ro_subelement)
|
|
|
|
alltags=[elem.tag for elem in root_xml.iter()]
|
|
|
|
ET.register_namespace("",name_space)
|
|
tree_xml.write(os.path.join(self.dir_out, file_name+'.xml'),xml_declaration=True,method='xml',encoding="utf8",default_namespace=None)
|
|
|
|
#sys.exit()
|
|
|