mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-08 11:49:55 +02:00
do_work_of_slopes_new*, do_back_rotation_and_get_cnt_back, do_work_of_contours_in_image: use mp.Pool, simplify
This commit is contained in:
parent
25e967397d
commit
68456ea002
3 changed files with 324 additions and 513 deletions
|
@ -11,8 +11,9 @@ import os
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import warnings
|
import warnings
|
||||||
|
from functools import partial
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from multiprocessing import Process, Queue, cpu_count
|
from multiprocessing import Pool, cpu_count
|
||||||
import gc
|
import gc
|
||||||
from ocrd_utils import getLogger
|
from ocrd_utils import getLogger
|
||||||
import cv2
|
import cv2
|
||||||
|
@ -60,14 +61,20 @@ from .utils.contour import (
|
||||||
from .utils.rotate import (
|
from .utils.rotate import (
|
||||||
rotate_image,
|
rotate_image,
|
||||||
rotation_not_90_func,
|
rotation_not_90_func,
|
||||||
rotation_not_90_func_full_layout)
|
rotation_not_90_func_full_layout
|
||||||
|
)
|
||||||
from .utils.separate_lines import (
|
from .utils.separate_lines import (
|
||||||
textline_contours_postprocessing,
|
textline_contours_postprocessing,
|
||||||
separate_lines_new2,
|
separate_lines_new2,
|
||||||
return_deskew_slop)
|
return_deskew_slop,
|
||||||
|
do_work_of_slopes_new,
|
||||||
|
do_work_of_slopes_new_curved,
|
||||||
|
do_work_of_slopes_new_light,
|
||||||
|
)
|
||||||
from .utils.drop_capitals import (
|
from .utils.drop_capitals import (
|
||||||
adhere_drop_capital_region_into_corresponding_textline,
|
adhere_drop_capital_region_into_corresponding_textline,
|
||||||
filter_small_drop_capitals_from_no_patch_layout)
|
filter_small_drop_capitals_from_no_patch_layout
|
||||||
|
)
|
||||||
from .utils.marginals import get_marginals
|
from .utils.marginals import get_marginals
|
||||||
from .utils.resize import resize_image
|
from .utils.resize import resize_image
|
||||||
from .utils import (
|
from .utils import (
|
||||||
|
@ -82,7 +89,8 @@ from .utils import (
|
||||||
small_textlines_to_parent_adherence2,
|
small_textlines_to_parent_adherence2,
|
||||||
order_of_regions,
|
order_of_regions,
|
||||||
find_number_of_columns_in_document,
|
find_number_of_columns_in_document,
|
||||||
return_boxes_of_images_by_order_of_reading_new)
|
return_boxes_of_images_by_order_of_reading_new
|
||||||
|
)
|
||||||
from .utils.pil_cv2 import check_dpi, pil2cv
|
from .utils.pil_cv2 import check_dpi, pil2cv
|
||||||
from .utils.xml import order_and_id_of_texts
|
from .utils.xml import order_and_id_of_texts
|
||||||
from .plot import EynollahPlotter
|
from .plot import EynollahPlotter
|
||||||
|
@ -1504,381 +1512,73 @@ class Eynollah:
|
||||||
|
|
||||||
all_box_coord.append(crop_coor)
|
all_box_coord.append(crop_coor)
|
||||||
|
|
||||||
return slopes, all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par)))
|
return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes
|
||||||
|
|
||||||
def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
|
def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
|
||||||
self.logger.debug("enter get_slopes_and_deskew_new")
|
if not len(contours):
|
||||||
|
return [], [], [], [], [], [], []
|
||||||
|
self.logger.debug("enter get_slopes_and_deskew_new_light")
|
||||||
if len(contours)>15:
|
if len(contours)>15:
|
||||||
num_cores = cpu_count()
|
num_cores = cpu_count()
|
||||||
else:
|
else:
|
||||||
num_cores = 1
|
num_cores = 1
|
||||||
queue_of_all_params = Queue()
|
with Pool(processes=num_cores) as pool:
|
||||||
|
results = pool.starmap(
|
||||||
processes = []
|
partial(do_work_of_slopes_new_light,
|
||||||
nh = np.linspace(0, len(boxes), num_cores + 1)
|
textline_mask_tot_ea=textline_mask_tot,
|
||||||
indexes_by_text_con = np.array(range(len(contours_par)))
|
image_page_rotated=image_page_rotated,
|
||||||
for i in range(num_cores):
|
slope_deskew=slope_deskew,
|
||||||
boxes_per_process = boxes[int(nh[i]) : int(nh[i + 1])]
|
logger=self.logger,
|
||||||
contours_per_process = contours[int(nh[i]) : int(nh[i + 1])]
|
MAX_SLOPE=MAX_SLOPE,
|
||||||
contours_par_per_process = contours_par[int(nh[i]) : int(nh[i + 1])]
|
KERNEL=KERNEL,
|
||||||
indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])]
|
plotter=self.plotter,),
|
||||||
|
zip(boxes, contours, contours_par, range(len(contours_par))))
|
||||||
processes.append(Process(target=self.do_work_of_slopes_new_light, args=(queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, contours_par_per_process, indexes_text_con_per_process, image_page_rotated, slope_deskew)))
|
#textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results)
|
||||||
for i in range(num_cores):
|
self.logger.debug("exit get_slopes_and_deskew_new_light")
|
||||||
processes[i].start()
|
return tuple(zip(*results))
|
||||||
|
|
||||||
slopes = []
|
|
||||||
all_found_textline_polygons = []
|
|
||||||
all_found_text_regions = []
|
|
||||||
all_found_text_regions_par = []
|
|
||||||
boxes = []
|
|
||||||
all_box_coord = []
|
|
||||||
all_index_text_con = []
|
|
||||||
for i in range(num_cores):
|
|
||||||
list_all_par = queue_of_all_params.get(True)
|
|
||||||
slopes_for_sub_process = list_all_par[0]
|
|
||||||
polys_for_sub_process = list_all_par[1]
|
|
||||||
boxes_for_sub_process = list_all_par[2]
|
|
||||||
contours_for_subprocess = list_all_par[3]
|
|
||||||
contours_par_for_subprocess = list_all_par[4]
|
|
||||||
boxes_coord_for_subprocess = list_all_par[5]
|
|
||||||
indexes_for_subprocess = list_all_par[6]
|
|
||||||
for j in range(len(slopes_for_sub_process)):
|
|
||||||
slopes.append(slopes_for_sub_process[j])
|
|
||||||
all_found_textline_polygons.append(polys_for_sub_process[j])
|
|
||||||
boxes.append(boxes_for_sub_process[j])
|
|
||||||
all_found_text_regions.append(contours_for_subprocess[j])
|
|
||||||
all_found_text_regions_par.append(contours_par_for_subprocess[j])
|
|
||||||
all_box_coord.append(boxes_coord_for_subprocess[j])
|
|
||||||
all_index_text_con.append(indexes_for_subprocess[j])
|
|
||||||
for i in range(num_cores):
|
|
||||||
processes[i].join()
|
|
||||||
self.logger.debug('slopes %s', slopes)
|
|
||||||
self.logger.debug("exit get_slopes_and_deskew_new")
|
|
||||||
return slopes, all_found_textline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con
|
|
||||||
|
|
||||||
def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
|
def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
|
||||||
|
if not len(contours):
|
||||||
|
return [], [], [], [], [], [], []
|
||||||
self.logger.debug("enter get_slopes_and_deskew_new")
|
self.logger.debug("enter get_slopes_and_deskew_new")
|
||||||
num_cores = cpu_count()
|
num_cores = cpu_count()
|
||||||
queue_of_all_params = Queue()
|
with Pool(processes=num_cores) as pool:
|
||||||
|
results = pool.starmap(
|
||||||
processes = []
|
partial(do_work_of_slopes_new,
|
||||||
nh = np.linspace(0, len(boxes), num_cores + 1)
|
textline_mask_tot_ea=textline_mask_tot,
|
||||||
indexes_by_text_con = np.array(range(len(contours_par)))
|
image_page_rotated=image_page_rotated,
|
||||||
for i in range(num_cores):
|
slope_deskew=slope_deskew,
|
||||||
boxes_per_process = boxes[int(nh[i]) : int(nh[i + 1])]
|
logger=self.logger,
|
||||||
contours_per_process = contours[int(nh[i]) : int(nh[i + 1])]
|
MAX_SLOPE=MAX_SLOPE,
|
||||||
contours_par_per_process = contours_par[int(nh[i]) : int(nh[i + 1])]
|
KERNEL=KERNEL,
|
||||||
indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])]
|
plotter=self.plotter,),
|
||||||
|
zip(boxes, contours, contours_par, range(len(contours_par))))
|
||||||
processes.append(Process(target=self.do_work_of_slopes_new, args=(queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, contours_par_per_process, indexes_text_con_per_process, image_page_rotated, slope_deskew)))
|
#textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results)
|
||||||
for i in range(num_cores):
|
|
||||||
processes[i].start()
|
|
||||||
|
|
||||||
slopes = []
|
|
||||||
all_found_textline_polygons = []
|
|
||||||
all_found_text_regions = []
|
|
||||||
all_found_text_regions_par = []
|
|
||||||
boxes = []
|
|
||||||
all_box_coord = []
|
|
||||||
all_index_text_con = []
|
|
||||||
for i in range(num_cores):
|
|
||||||
list_all_par = queue_of_all_params.get(True)
|
|
||||||
slopes_for_sub_process = list_all_par[0]
|
|
||||||
polys_for_sub_process = list_all_par[1]
|
|
||||||
boxes_for_sub_process = list_all_par[2]
|
|
||||||
contours_for_subprocess = list_all_par[3]
|
|
||||||
contours_par_for_subprocess = list_all_par[4]
|
|
||||||
boxes_coord_for_subprocess = list_all_par[5]
|
|
||||||
indexes_for_subprocess = list_all_par[6]
|
|
||||||
for j in range(len(slopes_for_sub_process)):
|
|
||||||
slopes.append(slopes_for_sub_process[j])
|
|
||||||
all_found_textline_polygons.append(polys_for_sub_process[j])
|
|
||||||
boxes.append(boxes_for_sub_process[j])
|
|
||||||
all_found_text_regions.append(contours_for_subprocess[j])
|
|
||||||
all_found_text_regions_par.append(contours_par_for_subprocess[j])
|
|
||||||
all_box_coord.append(boxes_coord_for_subprocess[j])
|
|
||||||
all_index_text_con.append(indexes_for_subprocess[j])
|
|
||||||
for i in range(num_cores):
|
|
||||||
processes[i].join()
|
|
||||||
self.logger.debug('slopes %s', slopes)
|
|
||||||
self.logger.debug("exit get_slopes_and_deskew_new")
|
self.logger.debug("exit get_slopes_and_deskew_new")
|
||||||
return slopes, all_found_textline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con
|
return tuple(zip(*results))
|
||||||
|
|
||||||
def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew):
|
def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew):
|
||||||
|
if not len(contours):
|
||||||
|
return [], [], [], [], [], [], []
|
||||||
self.logger.debug("enter get_slopes_and_deskew_new_curved")
|
self.logger.debug("enter get_slopes_and_deskew_new_curved")
|
||||||
num_cores = cpu_count()
|
num_cores = cpu_count()
|
||||||
queue_of_all_params = Queue()
|
with Pool(processes=num_cores) as pool:
|
||||||
|
results = pool.starmap(
|
||||||
processes = []
|
partial(do_work_of_slopes_new_curved,
|
||||||
nh = np.linspace(0, len(boxes), num_cores + 1)
|
textline_mask_tot_ea=textline_mask_tot,
|
||||||
indexes_by_text_con = np.array(range(len(contours_par)))
|
image_page_rotated=image_page_rotated,
|
||||||
|
mask_texts_only=mask_texts_only,
|
||||||
for i in range(num_cores):
|
num_col=num_col,
|
||||||
boxes_per_process = boxes[int(nh[i]) : int(nh[i + 1])]
|
scale_par=scale_par,
|
||||||
contours_per_process = contours[int(nh[i]) : int(nh[i + 1])]
|
slope_deskew=slope_deskew,
|
||||||
contours_par_per_process = contours_par[int(nh[i]) : int(nh[i + 1])]
|
logger=self.logger,
|
||||||
indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])]
|
MAX_SLOPE=MAX_SLOPE,
|
||||||
|
KERNEL=KERNEL,
|
||||||
processes.append(Process(target=self.do_work_of_slopes_new_curved, args=(queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, contours_par_per_process, image_page_rotated, mask_texts_only, num_col, scale_par, indexes_text_con_per_process, slope_deskew)))
|
plotter=self.plotter,),
|
||||||
|
zip(boxes, contours, contours_par, range(len(contours_par))))
|
||||||
for i in range(num_cores):
|
#textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results)
|
||||||
processes[i].start()
|
self.logger.debug("exit get_slopes_and_deskew_new_curved")
|
||||||
|
return tuple(zip(*results))
|
||||||
slopes = []
|
|
||||||
all_found_textline_polygons = []
|
|
||||||
all_found_text_regions = []
|
|
||||||
all_found_text_regions_par = []
|
|
||||||
boxes = []
|
|
||||||
all_box_coord = []
|
|
||||||
all_index_text_con = []
|
|
||||||
|
|
||||||
for i in range(num_cores):
|
|
||||||
list_all_par = queue_of_all_params.get(True)
|
|
||||||
polys_for_sub_process = list_all_par[0]
|
|
||||||
boxes_for_sub_process = list_all_par[1]
|
|
||||||
contours_for_subprocess = list_all_par[2]
|
|
||||||
contours_par_for_subprocess = list_all_par[3]
|
|
||||||
boxes_coord_for_subprocess = list_all_par[4]
|
|
||||||
indexes_for_subprocess = list_all_par[5]
|
|
||||||
slopes_for_sub_process = list_all_par[6]
|
|
||||||
for j in range(len(polys_for_sub_process)):
|
|
||||||
slopes.append(slopes_for_sub_process[j])
|
|
||||||
all_found_textline_polygons.append(polys_for_sub_process[j][::-1])
|
|
||||||
boxes.append(boxes_for_sub_process[j])
|
|
||||||
all_found_text_regions.append(contours_for_subprocess[j])
|
|
||||||
all_found_text_regions_par.append(contours_par_for_subprocess[j])
|
|
||||||
all_box_coord.append(boxes_coord_for_subprocess[j])
|
|
||||||
all_index_text_con.append(indexes_for_subprocess[j])
|
|
||||||
|
|
||||||
for i in range(num_cores):
|
|
||||||
processes[i].join()
|
|
||||||
# print(slopes,'slopes')
|
|
||||||
return all_found_textline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con, slopes
|
|
||||||
|
|
||||||
def do_work_of_slopes_new_curved(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, image_page_rotated, mask_texts_only, num_col, scale_par, indexes_r_con_per_pro, slope_deskew):
|
|
||||||
self.logger.debug("enter do_work_of_slopes_new_curved")
|
|
||||||
slopes_per_each_subprocess = []
|
|
||||||
bounding_box_of_textregion_per_each_subprocess = []
|
|
||||||
textlines_rectangles_per_each_subprocess = []
|
|
||||||
contours_textregion_per_each_subprocess = []
|
|
||||||
contours_textregion_par_per_each_subprocess = []
|
|
||||||
all_box_coord_per_process = []
|
|
||||||
index_by_text_region_contours = []
|
|
||||||
|
|
||||||
textline_cnt_separated = np.zeros(textline_mask_tot_ea.shape)
|
|
||||||
|
|
||||||
for mv in range(len(boxes_text)):
|
|
||||||
|
|
||||||
all_text_region_raw = textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]
|
|
||||||
all_text_region_raw = all_text_region_raw.astype(np.uint8)
|
|
||||||
img_int_p = all_text_region_raw[:, :]
|
|
||||||
|
|
||||||
# img_int_p=cv2.erode(img_int_p,KERNEL,iterations = 2)
|
|
||||||
# plt.imshow(img_int_p)
|
|
||||||
# plt.show()
|
|
||||||
|
|
||||||
if img_int_p.shape[0] / img_int_p.shape[1] < 0.1:
|
|
||||||
slopes_per_each_subprocess.append(0)
|
|
||||||
slope_for_all = [slope_deskew][0]
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
textline_con, hierarchy = return_contours_of_image(img_int_p)
|
|
||||||
textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.0008)
|
|
||||||
y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
|
|
||||||
if self.isNaN(y_diff_mean):
|
|
||||||
slope_for_all = MAX_SLOPE
|
|
||||||
else:
|
|
||||||
sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0)))
|
|
||||||
img_int_p[img_int_p > 0] = 1
|
|
||||||
slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=self.plotter)
|
|
||||||
|
|
||||||
if abs(slope_for_all) < 0.5:
|
|
||||||
slope_for_all = [slope_deskew][0]
|
|
||||||
|
|
||||||
except Exception as why:
|
|
||||||
self.logger.error(why)
|
|
||||||
slope_for_all = MAX_SLOPE
|
|
||||||
|
|
||||||
if slope_for_all == MAX_SLOPE:
|
|
||||||
slope_for_all = [slope_deskew][0]
|
|
||||||
slopes_per_each_subprocess.append(slope_for_all)
|
|
||||||
|
|
||||||
index_by_text_region_contours.append(indexes_r_con_per_pro[mv])
|
|
||||||
_, crop_coor = crop_image_inside_box(boxes_text[mv], image_page_rotated)
|
|
||||||
|
|
||||||
if abs(slope_for_all) < 45:
|
|
||||||
# all_box_coord.append(crop_coor)
|
|
||||||
textline_region_in_image = np.zeros(textline_mask_tot_ea.shape)
|
|
||||||
cnt_o_t_max = contours_par_per_process[mv]
|
|
||||||
x, y, w, h = cv2.boundingRect(cnt_o_t_max)
|
|
||||||
mask_biggest = np.zeros(mask_texts_only.shape)
|
|
||||||
mask_biggest = cv2.fillPoly(mask_biggest, pts=[cnt_o_t_max], color=(1, 1, 1))
|
|
||||||
mask_region_in_patch_region = mask_biggest[y : y + h, x : x + w]
|
|
||||||
textline_biggest_region = mask_biggest * textline_mask_tot_ea
|
|
||||||
|
|
||||||
# print(slope_for_all,'slope_for_all')
|
|
||||||
textline_rotated_separated = separate_lines_new2(textline_biggest_region[y : y + h, x : x + w], 0, num_col, slope_for_all, plotter=self.plotter)
|
|
||||||
|
|
||||||
# new line added
|
|
||||||
##print(np.shape(textline_rotated_separated),np.shape(mask_biggest))
|
|
||||||
textline_rotated_separated[mask_region_in_patch_region[:, :] != 1] = 0
|
|
||||||
# till here
|
|
||||||
|
|
||||||
textline_cnt_separated[y : y + h, x : x + w] = textline_rotated_separated
|
|
||||||
textline_region_in_image[y : y + h, x : x + w] = textline_rotated_separated
|
|
||||||
|
|
||||||
# plt.imshow(textline_region_in_image)
|
|
||||||
# plt.show()
|
|
||||||
# plt.imshow(textline_cnt_separated)
|
|
||||||
# plt.show()
|
|
||||||
|
|
||||||
pixel_img = 1
|
|
||||||
cnt_textlines_in_image = return_contours_of_interested_textline(textline_region_in_image, pixel_img)
|
|
||||||
|
|
||||||
textlines_cnt_per_region = []
|
|
||||||
for jjjj in range(len(cnt_textlines_in_image)):
|
|
||||||
mask_biggest2 = np.zeros(mask_texts_only.shape)
|
|
||||||
mask_biggest2 = cv2.fillPoly(mask_biggest2, pts=[cnt_textlines_in_image[jjjj]], color=(1, 1, 1))
|
|
||||||
if num_col + 1 == 1:
|
|
||||||
mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=5)
|
|
||||||
else:
|
|
||||||
mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4)
|
|
||||||
|
|
||||||
pixel_img = 1
|
|
||||||
mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par))
|
|
||||||
cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img)
|
|
||||||
try:
|
|
||||||
textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0])
|
|
||||||
except Exception as why:
|
|
||||||
self.logger.error(why)
|
|
||||||
else:
|
|
||||||
add_boxes_coor_into_textlines = True
|
|
||||||
textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv], add_boxes_coor_into_textlines)
|
|
||||||
add_boxes_coor_into_textlines = False
|
|
||||||
# print(np.shape(textlines_cnt_per_region),'textlines_cnt_per_region')
|
|
||||||
|
|
||||||
textlines_rectangles_per_each_subprocess.append(textlines_cnt_per_region)
|
|
||||||
bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv])
|
|
||||||
contours_textregion_per_each_subprocess.append(contours_per_process[mv])
|
|
||||||
contours_textregion_par_per_each_subprocess.append(contours_par_per_process[mv])
|
|
||||||
all_box_coord_per_process.append(crop_coor)
|
|
||||||
|
|
||||||
queue_of_all_params.put([textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours, slopes_per_each_subprocess])
|
|
||||||
def do_work_of_slopes_new_light(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, indexes_r_con_per_pro, image_page_rotated, slope_deskew):
|
|
||||||
self.logger.debug('enter do_work_of_slopes_new_light')
|
|
||||||
slopes_per_each_subprocess = []
|
|
||||||
bounding_box_of_textregion_per_each_subprocess = []
|
|
||||||
textlines_rectangles_per_each_subprocess = []
|
|
||||||
contours_textregion_per_each_subprocess = []
|
|
||||||
contours_textregion_par_per_each_subprocess = []
|
|
||||||
all_box_coord_per_process = []
|
|
||||||
index_by_text_region_contours = []
|
|
||||||
for mv in range(len(boxes_text)):
|
|
||||||
_, crop_coor = crop_image_inside_box(boxes_text[mv],image_page_rotated)
|
|
||||||
mask_textline = np.zeros((textline_mask_tot_ea.shape))
|
|
||||||
mask_textline = cv2.fillPoly(mask_textline,pts=[contours_per_process[mv]],color=(1,1,1))
|
|
||||||
all_text_region_raw = (textline_mask_tot_ea*mask_textline[:,:])[boxes_text[mv][1]:boxes_text[mv][1]+boxes_text[mv][3] , boxes_text[mv][0]:boxes_text[mv][0]+boxes_text[mv][2] ]
|
|
||||||
all_text_region_raw=all_text_region_raw.astype(np.uint8)
|
|
||||||
|
|
||||||
slopes_per_each_subprocess.append([slope_deskew][0])
|
|
||||||
mask_only_con_region = np.zeros(textline_mask_tot_ea.shape)
|
|
||||||
mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contours_par_per_process[mv]], color=(1, 1, 1))
|
|
||||||
|
|
||||||
|
|
||||||
if self.textline_light:
|
|
||||||
all_text_region_raw = np.copy(textline_mask_tot_ea)
|
|
||||||
all_text_region_raw[mask_only_con_region == 0] = 0
|
|
||||||
cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(all_text_region_raw)
|
|
||||||
cnt_clean_rot = filter_contours_area_of_image(all_text_region_raw, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001)
|
|
||||||
else:
|
|
||||||
all_text_region_raw = np.copy(textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]])
|
|
||||||
mask_only_con_region = mask_only_con_region[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]
|
|
||||||
all_text_region_raw[mask_only_con_region == 0] = 0
|
|
||||||
cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, [slope_deskew][0], contours_par_per_process[mv], boxes_text[mv])
|
|
||||||
|
|
||||||
textlines_rectangles_per_each_subprocess.append(cnt_clean_rot)
|
|
||||||
index_by_text_region_contours.append(indexes_r_con_per_pro[mv])
|
|
||||||
bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv])
|
|
||||||
|
|
||||||
contours_textregion_per_each_subprocess.append(contours_per_process[mv])
|
|
||||||
contours_textregion_par_per_each_subprocess.append(contours_par_per_process[mv])
|
|
||||||
all_box_coord_per_process.append(crop_coor)
|
|
||||||
queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours])
|
|
||||||
|
|
||||||
def do_work_of_slopes_new(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, indexes_r_con_per_pro, image_page_rotated, slope_deskew):
|
|
||||||
self.logger.debug('enter do_work_of_slopes_new')
|
|
||||||
slopes_per_each_subprocess = []
|
|
||||||
bounding_box_of_textregion_per_each_subprocess = []
|
|
||||||
textlines_rectangles_per_each_subprocess = []
|
|
||||||
contours_textregion_per_each_subprocess = []
|
|
||||||
contours_textregion_par_per_each_subprocess = []
|
|
||||||
all_box_coord_per_process = []
|
|
||||||
index_by_text_region_contours = []
|
|
||||||
for mv in range(len(boxes_text)):
|
|
||||||
_, crop_coor = crop_image_inside_box(boxes_text[mv],image_page_rotated)
|
|
||||||
mask_textline = np.zeros((textline_mask_tot_ea.shape))
|
|
||||||
mask_textline = cv2.fillPoly(mask_textline,pts=[contours_per_process[mv]],color=(1,1,1))
|
|
||||||
all_text_region_raw = (textline_mask_tot_ea*mask_textline[:,:])[boxes_text[mv][1]:boxes_text[mv][1]+boxes_text[mv][3] , boxes_text[mv][0]:boxes_text[mv][0]+boxes_text[mv][2] ]
|
|
||||||
all_text_region_raw=all_text_region_raw.astype(np.uint8)
|
|
||||||
img_int_p=all_text_region_raw[:,:]#self.all_text_region_raw[mv]
|
|
||||||
img_int_p=cv2.erode(img_int_p,KERNEL,iterations = 2)
|
|
||||||
|
|
||||||
if img_int_p.shape[0]/img_int_p.shape[1]<0.1:
|
|
||||||
slopes_per_each_subprocess.append(0)
|
|
||||||
slope_for_all = [slope_deskew][0]
|
|
||||||
all_text_region_raw = textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]
|
|
||||||
cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv], 0)
|
|
||||||
textlines_rectangles_per_each_subprocess.append(cnt_clean_rot)
|
|
||||||
index_by_text_region_contours.append(indexes_r_con_per_pro[mv])
|
|
||||||
bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv])
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
textline_con, hierarchy = return_contours_of_image(img_int_p)
|
|
||||||
textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.00008)
|
|
||||||
y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
|
|
||||||
if self.isNaN(y_diff_mean):
|
|
||||||
slope_for_all = MAX_SLOPE
|
|
||||||
else:
|
|
||||||
sigma_des = int(y_diff_mean * (4.0 / 40.0))
|
|
||||||
if sigma_des < 1:
|
|
||||||
sigma_des = 1
|
|
||||||
img_int_p[img_int_p > 0] = 1
|
|
||||||
slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=self.plotter)
|
|
||||||
if abs(slope_for_all) <= 0.5:
|
|
||||||
slope_for_all = [slope_deskew][0]
|
|
||||||
except Exception as why:
|
|
||||||
self.logger.error(why)
|
|
||||||
slope_for_all = MAX_SLOPE
|
|
||||||
if slope_for_all == MAX_SLOPE:
|
|
||||||
slope_for_all = [slope_deskew][0]
|
|
||||||
slopes_per_each_subprocess.append(slope_for_all)
|
|
||||||
mask_only_con_region = np.zeros(textline_mask_tot_ea.shape)
|
|
||||||
mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contours_par_per_process[mv]], color=(1, 1, 1))
|
|
||||||
|
|
||||||
# plt.imshow(mask_only_con_region)
|
|
||||||
# plt.show()
|
|
||||||
all_text_region_raw = np.copy(textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]])
|
|
||||||
mask_only_con_region = mask_only_con_region[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]
|
|
||||||
|
|
||||||
##plt.imshow(textline_mask_tot_ea)
|
|
||||||
##plt.show()
|
|
||||||
##plt.imshow(all_text_region_raw)
|
|
||||||
##plt.show()
|
|
||||||
##plt.imshow(mask_only_con_region)
|
|
||||||
##plt.show()
|
|
||||||
|
|
||||||
all_text_region_raw[mask_only_con_region == 0] = 0
|
|
||||||
cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv])
|
|
||||||
|
|
||||||
textlines_rectangles_per_each_subprocess.append(cnt_clean_rot)
|
|
||||||
index_by_text_region_contours.append(indexes_r_con_per_pro[mv])
|
|
||||||
bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv])
|
|
||||||
|
|
||||||
contours_textregion_per_each_subprocess.append(contours_per_process[mv])
|
|
||||||
contours_textregion_par_per_each_subprocess.append(contours_par_per_process[mv])
|
|
||||||
all_box_coord_per_process.append(crop_coor)
|
|
||||||
queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours])
|
|
||||||
|
|
||||||
def textline_contours(self, img, use_patches, scaler_h, scaler_w, num_col_classifier=None):
|
def textline_contours(self, img, use_patches, scaler_h, scaler_w, num_col_classifier=None):
|
||||||
self.logger.debug('enter textline_contours')
|
self.logger.debug('enter textline_contours')
|
||||||
|
@ -1923,6 +1623,7 @@ class Eynollah:
|
||||||
prediction_textline_longshot = self.do_prediction(False, img, self.model_textline)
|
prediction_textline_longshot = self.do_prediction(False, img, self.model_textline)
|
||||||
prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w)
|
prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w)
|
||||||
|
|
||||||
|
self.logger.debug('exit textline_contours')
|
||||||
return ((prediction_textline[:, :, 0]==1)*1).astype('uint8'), ((prediction_textline_longshot_true_size[:, :, 0]==1)*1).astype('uint8')
|
return ((prediction_textline[:, :, 0]==1)*1).astype('uint8'), ((prediction_textline_longshot_true_size[:, :, 0]==1)*1).astype('uint8')
|
||||||
|
|
||||||
|
|
||||||
|
@ -1959,6 +1660,7 @@ class Eynollah:
|
||||||
q.put(slopes_sub)
|
q.put(slopes_sub)
|
||||||
poly.put(poly_sub)
|
poly.put(poly_sub)
|
||||||
box_sub.put(boxes_sub_new)
|
box_sub.put(boxes_sub_new)
|
||||||
|
self.logger.debug('exit do_work_of_slopes')
|
||||||
|
|
||||||
def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_classifier):
|
def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_classifier):
|
||||||
self.logger.debug("enter get_regions_extract_images_only")
|
self.logger.debug("enter get_regions_extract_images_only")
|
||||||
|
@ -2069,6 +1771,7 @@ class Eynollah:
|
||||||
|
|
||||||
polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) )
|
polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) )
|
||||||
|
|
||||||
|
self.logger.debug("exit get_regions_extract_images_only")
|
||||||
return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page
|
return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page
|
||||||
|
|
||||||
def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False):
|
def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False):
|
||||||
|
@ -2146,6 +1849,7 @@ class Eynollah:
|
||||||
#print("inside 1 ", time.time()-t_in)
|
#print("inside 1 ", time.time()-t_in)
|
||||||
|
|
||||||
###textline_mask_tot_ea = self.run_textline(img_bin)
|
###textline_mask_tot_ea = self.run_textline(img_bin)
|
||||||
|
self.logger.debug("detecting textlines on %s with %d colors", str(img_resized.shape), len(np.unique(img_resized)))
|
||||||
textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier)
|
textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier)
|
||||||
|
|
||||||
|
|
||||||
|
@ -2269,9 +1973,11 @@ class Eynollah:
|
||||||
#plt.imshow(textline_mask_tot_ea)
|
#plt.imshow(textline_mask_tot_ea)
|
||||||
#plt.show()
|
#plt.show()
|
||||||
#print("inside 4 ", time.time()-t_in)
|
#print("inside 4 ", time.time()-t_in)
|
||||||
|
self.logger.debug("exit get_regions_light_v")
|
||||||
return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin
|
return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin
|
||||||
else:
|
else:
|
||||||
img_bin = resize_image(img_bin,img_height_h, img_width_h )
|
img_bin = resize_image(img_bin,img_height_h, img_width_h )
|
||||||
|
self.logger.debug("exit get_regions_light_v")
|
||||||
return None, erosion_hurts, None, textline_mask_tot_ea, img_bin
|
return None, erosion_hurts, None, textline_mask_tot_ea, img_bin
|
||||||
|
|
||||||
def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier):
|
def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier):
|
||||||
|
@ -2392,6 +2098,7 @@ class Eynollah:
|
||||||
|
|
||||||
text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1))
|
text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1))
|
||||||
|
|
||||||
|
self.logger.debug("exit get_regions_from_xy_2models")
|
||||||
return text_regions_p_true, erosion_hurts, polygons_lines_xml
|
return text_regions_p_true, erosion_hurts, polygons_lines_xml
|
||||||
except:
|
except:
|
||||||
|
|
||||||
|
@ -2461,6 +2168,7 @@ class Eynollah:
|
||||||
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1))
|
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1))
|
||||||
|
|
||||||
erosion_hurts = True
|
erosion_hurts = True
|
||||||
|
self.logger.debug("exit get_regions_from_xy_2models")
|
||||||
return text_regions_p_true, erosion_hurts, polygons_lines_xml
|
return text_regions_p_true, erosion_hurts, polygons_lines_xml
|
||||||
|
|
||||||
def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot):
|
def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot):
|
||||||
|
@ -2633,6 +2341,7 @@ class Eynollah:
|
||||||
for iii in range(len(order_of_texts_tot)):
|
for iii in range(len(order_of_texts_tot)):
|
||||||
order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0])
|
order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0])
|
||||||
|
|
||||||
|
self.logger.debug("exit do_order_of_regions_full_layout")
|
||||||
return order_text_new, id_of_texts_tot
|
return order_text_new, id_of_texts_tot
|
||||||
|
|
||||||
def do_order_of_regions_no_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot):
|
def do_order_of_regions_no_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot):
|
||||||
|
@ -2743,6 +2452,7 @@ class Eynollah:
|
||||||
for iii in range(len(order_of_texts_tot)):
|
for iii in range(len(order_of_texts_tot)):
|
||||||
order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0])
|
order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0])
|
||||||
|
|
||||||
|
self.logger.debug("exit do_order_of_regions_no_full_layout")
|
||||||
return order_text_new, id_of_texts_tot
|
return order_text_new, id_of_texts_tot
|
||||||
def check_iou_of_bounding_box_and_contour_for_tables(self, layout, table_prediction_early, pixel_tabel, num_col_classifier):
|
def check_iou_of_bounding_box_and_contour_for_tables(self, layout, table_prediction_early, pixel_tabel, num_col_classifier):
|
||||||
layout_org = np.copy(layout)
|
layout_org = np.copy(layout)
|
||||||
|
@ -5051,12 +4761,12 @@ class Eynollah:
|
||||||
if not self.curved_line:
|
if not self.curved_line:
|
||||||
if self.light_version:
|
if self.light_version:
|
||||||
if self.textline_light:
|
if self.textline_light:
|
||||||
#slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \
|
#all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \
|
||||||
# self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew)
|
# self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew)
|
||||||
|
|
||||||
slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \
|
all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \
|
||||||
self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew)
|
self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew)
|
||||||
slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = \
|
all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \
|
||||||
self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew)
|
self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew)
|
||||||
|
|
||||||
#slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \
|
#slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \
|
||||||
|
@ -5074,17 +4784,17 @@ class Eynollah:
|
||||||
|
|
||||||
else:
|
else:
|
||||||
textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1)
|
textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1)
|
||||||
slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \
|
all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \
|
||||||
self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew)
|
self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew)
|
||||||
slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = \
|
all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \
|
||||||
self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew)
|
self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew)
|
||||||
|
|
||||||
#all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline")
|
#all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline")
|
||||||
else:
|
else:
|
||||||
textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1)
|
textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1)
|
||||||
slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = \
|
all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = \
|
||||||
self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew)
|
self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew)
|
||||||
slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = \
|
all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = \
|
||||||
self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew)
|
self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew)
|
||||||
else:
|
else:
|
||||||
scale_param = 1
|
scale_param = 1
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
|
from functools import partial
|
||||||
|
from multiprocessing import cpu_count, Pool
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from shapely import geometry
|
from shapely import geometry
|
||||||
|
|
||||||
from .rotate import rotate_image, rotation_image_new
|
from .rotate import rotate_image, rotation_image_new
|
||||||
from multiprocessing import Process, Queue, cpu_count
|
|
||||||
from multiprocessing import Pool
|
|
||||||
def contours_in_same_horizon(cy_main_hor):
|
def contours_in_same_horizon(cy_main_hor):
|
||||||
X1 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
|
X1 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
|
||||||
X2 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
|
X2 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
|
||||||
|
@ -29,7 +30,6 @@ def find_contours_mean_y_diff(contours_main):
|
||||||
|
|
||||||
|
|
||||||
def get_text_region_boxes_by_given_contours(contours):
|
def get_text_region_boxes_by_given_contours(contours):
|
||||||
|
|
||||||
kernel = np.ones((5, 5), np.uint8)
|
kernel = np.ones((5, 5), np.uint8)
|
||||||
boxes = []
|
boxes = []
|
||||||
contours_new = []
|
contours_new = []
|
||||||
|
@ -144,73 +144,11 @@ def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
|
||||||
|
|
||||||
return contours_imgs
|
return contours_imgs
|
||||||
|
|
||||||
def do_work_of_contours_in_image(queue_of_all_params, contours_per_process, indexes_r_con_per_pro, img, slope_first):
|
def do_work_of_contours_in_image(contour, index_r_con, img, slope_first):
|
||||||
cnts_org_per_each_subprocess = []
|
|
||||||
index_by_text_region_contours = []
|
|
||||||
for mv in range(len(contours_per_process)):
|
|
||||||
index_by_text_region_contours.append(indexes_r_con_per_pro[mv])
|
|
||||||
|
|
||||||
img_copy = np.zeros(img.shape)
|
|
||||||
img_copy = cv2.fillPoly(img_copy, pts=[contours_per_process[mv]], color=(1, 1, 1))
|
|
||||||
|
|
||||||
img_copy = rotation_image_new(img_copy, -slope_first)
|
|
||||||
|
|
||||||
img_copy = img_copy.astype(np.uint8)
|
|
||||||
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
|
|
||||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
|
||||||
|
|
||||||
cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
|
||||||
|
|
||||||
cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
|
|
||||||
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
|
|
||||||
|
|
||||||
|
|
||||||
cnts_org_per_each_subprocess.append(cont_int[0])
|
|
||||||
|
|
||||||
queue_of_all_params.put([ cnts_org_per_each_subprocess, index_by_text_region_contours])
|
|
||||||
|
|
||||||
|
|
||||||
def get_textregion_contours_in_org_image_multi(cnts, img, slope_first):
|
|
||||||
|
|
||||||
num_cores = cpu_count()
|
|
||||||
queue_of_all_params = Queue()
|
|
||||||
|
|
||||||
processes = []
|
|
||||||
nh = np.linspace(0, len(cnts), num_cores + 1)
|
|
||||||
indexes_by_text_con = np.array(range(len(cnts)))
|
|
||||||
for i in range(num_cores):
|
|
||||||
contours_per_process = cnts[int(nh[i]) : int(nh[i + 1])]
|
|
||||||
indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])]
|
|
||||||
|
|
||||||
processes.append(Process(target=do_work_of_contours_in_image, args=(queue_of_all_params, contours_per_process, indexes_text_con_per_process, img,slope_first )))
|
|
||||||
for i in range(num_cores):
|
|
||||||
processes[i].start()
|
|
||||||
cnts_org = []
|
|
||||||
all_index_text_con = []
|
|
||||||
for i in range(num_cores):
|
|
||||||
list_all_par = queue_of_all_params.get(True)
|
|
||||||
contours_for_sub_process = list_all_par[0]
|
|
||||||
indexes_for_sub_process = list_all_par[1]
|
|
||||||
for j in range(len(contours_for_sub_process)):
|
|
||||||
cnts_org.append(contours_for_sub_process[j])
|
|
||||||
all_index_text_con.append(indexes_for_sub_process[j])
|
|
||||||
for i in range(num_cores):
|
|
||||||
processes[i].join()
|
|
||||||
|
|
||||||
print(all_index_text_con)
|
|
||||||
return cnts_org
|
|
||||||
def loop_contour_image(index_l, cnts,img, slope_first):
|
|
||||||
img_copy = np.zeros(img.shape)
|
img_copy = np.zeros(img.shape)
|
||||||
img_copy = cv2.fillPoly(img_copy, pts=[cnts[index_l]], color=(1, 1, 1))
|
img_copy = cv2.fillPoly(img_copy, pts=[contour], color=(1, 1, 1))
|
||||||
|
|
||||||
# plt.imshow(img_copy)
|
|
||||||
# plt.show()
|
|
||||||
|
|
||||||
# print(img.shape,'img')
|
|
||||||
img_copy = rotation_image_new(img_copy, -slope_first)
|
img_copy = rotation_image_new(img_copy, -slope_first)
|
||||||
##print(img_copy.shape,'img_copy')
|
|
||||||
# plt.imshow(img_copy)
|
|
||||||
# plt.show()
|
|
||||||
|
|
||||||
img_copy = img_copy.astype(np.uint8)
|
img_copy = img_copy.astype(np.uint8)
|
||||||
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
|
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
|
||||||
|
@ -220,17 +158,22 @@ def loop_contour_image(index_l, cnts,img, slope_first):
|
||||||
|
|
||||||
cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
|
cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
|
||||||
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
|
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
|
||||||
# print(np.shape(cont_int[0]))
|
|
||||||
return cont_int[0]
|
|
||||||
|
|
||||||
def get_textregion_contours_in_org_image_multi2(cnts, img, slope_first):
|
|
||||||
|
|
||||||
cnts_org = []
|
return cont_int[0], index_r_con
|
||||||
# print(cnts,'cnts')
|
|
||||||
with Pool(cpu_count()) as p:
|
def get_textregion_contours_in_org_image_multi(cnts, img, slope_first):
|
||||||
cnts_org = p.starmap(loop_contour_image, [(index_l,cnts, img,slope_first) for index_l in range(len(cnts))])
|
if not len(cnts):
|
||||||
|
return [], []
|
||||||
return cnts_org
|
num_cores = cpu_count()
|
||||||
|
with Pool(processes=num_cores) as pool:
|
||||||
|
results = pool.starmap(
|
||||||
|
partial(do_work_of_contours_in_image,
|
||||||
|
img=img,
|
||||||
|
slope_first=slope_first,
|
||||||
|
),
|
||||||
|
zip(cnts, range(len(cnts))))
|
||||||
|
return tuple(zip(*results))
|
||||||
|
|
||||||
def get_textregion_contours_in_org_image(cnts, img, slope_first):
|
def get_textregion_contours_in_org_image(cnts, img, slope_first):
|
||||||
|
|
||||||
|
@ -292,69 +235,40 @@ def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first):
|
||||||
|
|
||||||
return cnts_org
|
return cnts_org
|
||||||
|
|
||||||
def return_list_of_contours_with_desired_order(ls_cons, sorted_indexes):
|
def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first):
|
||||||
return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))]
|
img_copy = np.zeros(img.shape)
|
||||||
def do_back_rotation_and_get_cnt_back(queue_of_all_params, contours_par_per_process,indexes_r_con_per_pro, img, slope_first):
|
img_copy = cv2.fillPoly(img_copy, pts=[contour_par], color=(1, 1, 1))
|
||||||
contours_textregion_per_each_subprocess = []
|
|
||||||
index_by_text_region_contours = []
|
|
||||||
for mv in range(len(contours_par_per_process)):
|
|
||||||
img_copy = np.zeros(img.shape)
|
|
||||||
img_copy = cv2.fillPoly(img_copy, pts=[contours_par_per_process[mv]], color=(1, 1, 1))
|
|
||||||
|
|
||||||
img_copy = rotation_image_new(img_copy, -slope_first)
|
img_copy = rotation_image_new(img_copy, -slope_first)
|
||||||
|
|
||||||
img_copy = img_copy.astype(np.uint8)
|
img_copy = img_copy.astype(np.uint8)
|
||||||
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
|
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
|
||||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||||
|
|
||||||
cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
|
||||||
cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
|
cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
|
||||||
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
|
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
|
||||||
# print(np.shape(cont_int[0]))
|
# print(np.shape(cont_int[0]))
|
||||||
contours_textregion_per_each_subprocess.append(cont_int[0]*6)
|
return cont_int[0], index_r_con
|
||||||
index_by_text_region_contours.append(indexes_r_con_per_pro[mv])
|
|
||||||
|
|
||||||
queue_of_all_params.put([contours_textregion_per_each_subprocess, index_by_text_region_contours])
|
|
||||||
|
|
||||||
def get_textregion_contours_in_org_image_light(cnts, img, slope_first):
|
def get_textregion_contours_in_org_image_light(cnts, img, slope_first):
|
||||||
num_cores = cpu_count()
|
if not len(cnts):
|
||||||
queue_of_all_params = Queue()
|
return []
|
||||||
processes = []
|
img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
|
||||||
nh = np.linspace(0, len(cnts), num_cores + 1)
|
|
||||||
indexes_by_text_con = np.array(range(len(cnts)))
|
|
||||||
|
|
||||||
h_o = img.shape[0]
|
|
||||||
w_o = img.shape[1]
|
|
||||||
|
|
||||||
img = cv2.resize(img, (int(img.shape[1]/6.), int(img.shape[0]/6.)), interpolation=cv2.INTER_NEAREST)
|
|
||||||
##cnts = list( (np.array(cnts)/2).astype(np.int16) )
|
##cnts = list( (np.array(cnts)/2).astype(np.int16) )
|
||||||
#cnts = cnts/2
|
#cnts = cnts/2
|
||||||
cnts = [(i/ 6).astype(np.int32) for i in cnts]
|
cnts = [(i/6).astype(np.int) for i in cnts]
|
||||||
|
num_cores = cpu_count()
|
||||||
for i in range(num_cores):
|
with Pool(processes=num_cores) as pool:
|
||||||
contours_par_per_process = cnts[int(nh[i]) : int(nh[i + 1])]
|
results = pool.starmap(
|
||||||
indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])]
|
partial(do_back_rotation_and_get_cnt_back,
|
||||||
processes.append(Process(target=do_back_rotation_and_get_cnt_back, args=(queue_of_all_params, contours_par_per_process, indexes_text_con_per_process, img, slope_first)))
|
img=img,
|
||||||
|
slope_first=slope_first,
|
||||||
for i in range(num_cores):
|
),
|
||||||
processes[i].start()
|
zip(cnts, range(len(cnts))))
|
||||||
|
contours, indexes = tuple(zip(*results))
|
||||||
cnts_org = []
|
return [i*6 for i in contours]
|
||||||
all_index_text_con = []
|
|
||||||
for i in range(num_cores):
|
|
||||||
list_all_par = queue_of_all_params.get(True)
|
|
||||||
contours_for_subprocess = list_all_par[0]
|
|
||||||
indexes_for_subprocess = list_all_par[1]
|
|
||||||
for j in range(len(contours_for_subprocess)):
|
|
||||||
cnts_org.append(contours_for_subprocess[j])
|
|
||||||
all_index_text_con.append(indexes_for_subprocess[j])
|
|
||||||
for i in range(num_cores):
|
|
||||||
processes[i].join()
|
|
||||||
|
|
||||||
cnts_org = return_list_of_contours_with_desired_order(cnts_org, all_index_text_con)
|
|
||||||
|
|
||||||
return cnts_org
|
|
||||||
|
|
||||||
def return_contours_of_interested_textline(region_pre_p, pixel):
|
def return_contours_of_interested_textline(region_pre_p, pixel):
|
||||||
|
|
||||||
|
|
|
@ -1,22 +1,23 @@
|
||||||
|
import os
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
from multiprocessing import Pool, cpu_count
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import cv2
|
import cv2
|
||||||
from scipy.signal import find_peaks
|
from scipy.signal import find_peaks
|
||||||
from scipy.ndimage import gaussian_filter1d
|
from scipy.ndimage import gaussian_filter1d
|
||||||
import os
|
|
||||||
from multiprocessing import Process, Queue, cpu_count
|
|
||||||
from multiprocessing import Pool
|
|
||||||
from .rotate import rotate_image
|
from .rotate import rotate_image
|
||||||
|
from .resize import resize_image
|
||||||
from .contour import (
|
from .contour import (
|
||||||
return_parent_contours,
|
return_parent_contours,
|
||||||
filter_contours_area_of_image_tables,
|
filter_contours_area_of_image_tables,
|
||||||
return_contours_of_image,
|
return_contours_of_image,
|
||||||
filter_contours_area_of_image
|
filter_contours_area_of_image,
|
||||||
|
return_contours_of_interested_textline,
|
||||||
|
find_contours_mean_y_diff,
|
||||||
)
|
)
|
||||||
from .is_nan import isNaN
|
|
||||||
from . import (
|
from . import (
|
||||||
find_num_col_deskew,
|
find_num_col_deskew,
|
||||||
isNaN,
|
crop_image_inside_box,
|
||||||
)
|
)
|
||||||
|
|
||||||
def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
|
def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
|
||||||
|
@ -1249,13 +1250,13 @@ def separate_lines_new_inside_tiles(img_path, thetha):
|
||||||
forest.append(peaks_neg[i + 1])
|
forest.append(peaks_neg[i + 1])
|
||||||
if diff_peaks[i] > cut_off:
|
if diff_peaks[i] > cut_off:
|
||||||
# print(forest[np.argmin(z[forest]) ] )
|
# print(forest[np.argmin(z[forest]) ] )
|
||||||
if not isNaN(forest[np.argmin(z[forest])]):
|
if not np.isnan(forest[np.argmin(z[forest])]):
|
||||||
peaks_neg_true.append(forest[np.argmin(z[forest])])
|
peaks_neg_true.append(forest[np.argmin(z[forest])])
|
||||||
forest = []
|
forest = []
|
||||||
forest.append(peaks_neg[i + 1])
|
forest.append(peaks_neg[i + 1])
|
||||||
if i == (len(peaks_neg) - 1):
|
if i == (len(peaks_neg) - 1):
|
||||||
# print(print(forest[np.argmin(z[forest]) ] ))
|
# print(print(forest[np.argmin(z[forest]) ] ))
|
||||||
if not isNaN(forest[np.argmin(z[forest])]):
|
if not np.isnan(forest[np.argmin(z[forest])]):
|
||||||
peaks_neg_true.append(forest[np.argmin(z[forest])])
|
peaks_neg_true.append(forest[np.argmin(z[forest])])
|
||||||
|
|
||||||
diff_peaks_pos = np.abs(np.diff(peaks))
|
diff_peaks_pos = np.abs(np.diff(peaks))
|
||||||
|
@ -1272,13 +1273,13 @@ def separate_lines_new_inside_tiles(img_path, thetha):
|
||||||
forest.append(peaks[i + 1])
|
forest.append(peaks[i + 1])
|
||||||
if diff_peaks_pos[i] > cut_off:
|
if diff_peaks_pos[i] > cut_off:
|
||||||
# print(forest[np.argmin(z[forest]) ] )
|
# print(forest[np.argmin(z[forest]) ] )
|
||||||
if not isNaN(forest[np.argmax(z[forest])]):
|
if not np.isnan(forest[np.argmax(z[forest])]):
|
||||||
peaks_pos_true.append(forest[np.argmax(z[forest])])
|
peaks_pos_true.append(forest[np.argmax(z[forest])])
|
||||||
forest = []
|
forest = []
|
||||||
forest.append(peaks[i + 1])
|
forest.append(peaks[i + 1])
|
||||||
if i == (len(peaks) - 1):
|
if i == (len(peaks) - 1):
|
||||||
# print(print(forest[np.argmin(z[forest]) ] ))
|
# print(print(forest[np.argmin(z[forest]) ] ))
|
||||||
if not isNaN(forest[np.argmax(z[forest])]):
|
if not np.isnan(forest[np.argmax(z[forest])]):
|
||||||
peaks_pos_true.append(forest[np.argmax(z[forest])])
|
peaks_pos_true.append(forest[np.argmax(z[forest])])
|
||||||
|
|
||||||
# print(len(peaks_neg_true) ,len(peaks_pos_true) ,'lensss')
|
# print(len(peaks_neg_true) ,len(peaks_pos_true) ,'lensss')
|
||||||
|
@ -1658,3 +1659,189 @@ def get_smallest_skew(img, sigma_des, angles, num_cores=1, plotter=None):
|
||||||
except:
|
except:
|
||||||
angle = 0
|
angle = 0
|
||||||
return angle
|
return angle
|
||||||
|
|
||||||
|
def do_work_of_slopes_new(
|
||||||
|
box_text, contour, contour_par, index_r_con,
|
||||||
|
textline_mask_tot_ea, image_page_rotated, slope_deskew,
|
||||||
|
logger, MAX_SLOPE=999, KERNEL=None, plotter=None
|
||||||
|
):
|
||||||
|
logger.debug('enter do_work_of_slopes_new')
|
||||||
|
if KERNEL is None:
|
||||||
|
KERNEL = np.ones((5, 5), np.uint8)
|
||||||
|
|
||||||
|
x, y, w, h = box_text
|
||||||
|
_, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
|
||||||
|
mask_textline = np.zeros(textline_mask_tot_ea.shape)
|
||||||
|
mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
|
||||||
|
all_text_region_raw = textline_mask_tot_ea * mask_textline
|
||||||
|
all_text_region_raw = all_text_region_raw[y: y + h, x: x + w].astype(np.uint8)
|
||||||
|
img_int_p = all_text_region_raw[:,:]
|
||||||
|
img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2)
|
||||||
|
|
||||||
|
if img_int_p.shape[0] /img_int_p.shape[1] < 0.1:
|
||||||
|
slope = 0
|
||||||
|
slope_for_all = slope_deskew
|
||||||
|
all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w]
|
||||||
|
cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, 0)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
textline_con, hierarchy = return_contours_of_image(img_int_p)
|
||||||
|
textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.00008)
|
||||||
|
y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
|
||||||
|
if np.isnan(y_diff_mean):
|
||||||
|
slope_for_all = MAX_SLOPE
|
||||||
|
else:
|
||||||
|
sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0)))
|
||||||
|
img_int_p[img_int_p > 0] = 1
|
||||||
|
slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=plotter)
|
||||||
|
if abs(slope_for_all) <= 0.5:
|
||||||
|
slope_for_all = slope_deskew
|
||||||
|
except Exception as why:
|
||||||
|
logger.error(why)
|
||||||
|
slope_for_all = MAX_SLOPE
|
||||||
|
|
||||||
|
if slope_for_all == MAX_SLOPE:
|
||||||
|
slope_for_all = slope_deskew
|
||||||
|
slope = slope_for_all
|
||||||
|
|
||||||
|
mask_only_con_region = np.zeros(textline_mask_tot_ea.shape)
|
||||||
|
mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contour_par], color=(1, 1, 1))
|
||||||
|
|
||||||
|
# plt.imshow(mask_only_con_region)
|
||||||
|
# plt.show()
|
||||||
|
all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w].copy()
|
||||||
|
mask_only_con_region = mask_only_con_region[y: y + h, x: x + w]
|
||||||
|
|
||||||
|
##plt.imshow(textline_mask_tot_ea)
|
||||||
|
##plt.show()
|
||||||
|
##plt.imshow(all_text_region_raw)
|
||||||
|
##plt.show()
|
||||||
|
##plt.imshow(mask_only_con_region)
|
||||||
|
##plt.show()
|
||||||
|
|
||||||
|
all_text_region_raw[mask_only_con_region == 0] = 0
|
||||||
|
cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text)
|
||||||
|
|
||||||
|
return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope
|
||||||
|
|
||||||
|
|
||||||
|
def do_work_of_slopes_new_curved(
|
||||||
|
box_text, contour, contour_par, index_r_con,
|
||||||
|
textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew,
|
||||||
|
logger, MAX_SLOPE=999, KERNEL=None, plotter=None
|
||||||
|
):
|
||||||
|
logger.debug("enter do_work_of_slopes_new_curved")
|
||||||
|
if KERNEL is None:
|
||||||
|
KERNEL = np.ones((5, 5), np.uint8)
|
||||||
|
|
||||||
|
x, y, w, h = box_text
|
||||||
|
all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w].astype(np.uint8)
|
||||||
|
img_int_p = all_text_region_raw[:, :]
|
||||||
|
|
||||||
|
# img_int_p=cv2.erode(img_int_p,KERNEL,iterations = 2)
|
||||||
|
# plt.imshow(img_int_p)
|
||||||
|
# plt.show()
|
||||||
|
|
||||||
|
if img_int_p.shape[0] / img_int_p.shape[1] < 0.1:
|
||||||
|
slope = 0
|
||||||
|
slope_for_all = slope_deskew
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
textline_con, hierarchy = return_contours_of_image(img_int_p)
|
||||||
|
textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.0008)
|
||||||
|
y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
|
||||||
|
if np.isnan(y_diff_mean):
|
||||||
|
slope_for_all = MAX_SLOPE
|
||||||
|
else:
|
||||||
|
sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0)))
|
||||||
|
img_int_p[img_int_p > 0] = 1
|
||||||
|
slope_for_all = return_deskew_slop(img_int_p, sigma_des, plotter=plotter)
|
||||||
|
if abs(slope_for_all) < 0.5:
|
||||||
|
slope_for_all = slope_deskew
|
||||||
|
except Exception as why:
|
||||||
|
logger.error(why)
|
||||||
|
slope_for_all = MAX_SLOPE
|
||||||
|
|
||||||
|
if slope_for_all == MAX_SLOPE:
|
||||||
|
slope_for_all = slope_deskew
|
||||||
|
slope = slope_for_all
|
||||||
|
|
||||||
|
_, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
|
||||||
|
|
||||||
|
if abs(slope_for_all) < 45:
|
||||||
|
textline_region_in_image = np.zeros(textline_mask_tot_ea.shape)
|
||||||
|
x, y, w, h = cv2.boundingRect(contour_par)
|
||||||
|
mask_biggest = np.zeros(mask_texts_only.shape)
|
||||||
|
mask_biggest = cv2.fillPoly(mask_biggest, pts=[contour_par], color=(1, 1, 1))
|
||||||
|
mask_region_in_patch_region = mask_biggest[y : y + h, x : x + w]
|
||||||
|
textline_biggest_region = mask_biggest * textline_mask_tot_ea
|
||||||
|
|
||||||
|
# print(slope_for_all,'slope_for_all')
|
||||||
|
textline_rotated_separated = separate_lines_new2(textline_biggest_region[y: y+h, x: x+w], 0, num_col, slope_for_all,
|
||||||
|
plotter=plotter)
|
||||||
|
|
||||||
|
# new line added
|
||||||
|
##print(np.shape(textline_rotated_separated),np.shape(mask_biggest))
|
||||||
|
textline_rotated_separated[mask_region_in_patch_region[:, :] != 1] = 0
|
||||||
|
# till here
|
||||||
|
|
||||||
|
textline_region_in_image[y : y + h, x : x + w] = textline_rotated_separated
|
||||||
|
|
||||||
|
# plt.imshow(textline_region_in_image)
|
||||||
|
# plt.show()
|
||||||
|
|
||||||
|
pixel_img = 1
|
||||||
|
cnt_textlines_in_image = return_contours_of_interested_textline(textline_region_in_image, pixel_img)
|
||||||
|
|
||||||
|
textlines_cnt_per_region = []
|
||||||
|
for jjjj in range(len(cnt_textlines_in_image)):
|
||||||
|
mask_biggest2 = np.zeros(mask_texts_only.shape)
|
||||||
|
mask_biggest2 = cv2.fillPoly(mask_biggest2, pts=[cnt_textlines_in_image[jjjj]], color=(1, 1, 1))
|
||||||
|
if num_col + 1 == 1:
|
||||||
|
mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=5)
|
||||||
|
else:
|
||||||
|
mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4)
|
||||||
|
|
||||||
|
pixel_img = 1
|
||||||
|
mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par))
|
||||||
|
cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img)
|
||||||
|
try:
|
||||||
|
textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0])
|
||||||
|
except Exception as why:
|
||||||
|
logger.error(why)
|
||||||
|
else:
|
||||||
|
textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, True)
|
||||||
|
# print(np.shape(textlines_cnt_per_region),'textlines_cnt_per_region')
|
||||||
|
|
||||||
|
return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope
|
||||||
|
|
||||||
|
def do_work_of_slopes_new_light(
|
||||||
|
box_text, contour, contour_par, index_r_con,
|
||||||
|
textline_mask_tot_ea, image_page_rotated, slope_deskew,
|
||||||
|
logger
|
||||||
|
):
|
||||||
|
logger.debug('enter do_work_of_slopes_new_light')
|
||||||
|
|
||||||
|
x, y, w, h = box_text
|
||||||
|
_, crop_coor = crop_image_inside_box(box_text, image_page_rotated)
|
||||||
|
mask_textline = np.zeros(textline_mask_tot_ea.shape)
|
||||||
|
mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
|
||||||
|
all_text_region_raw = textline_mask_tot_ea * mask_textline
|
||||||
|
all_text_region_raw = all_text_region_raw[y: y + h, x: x + w].astype(np.uint8)
|
||||||
|
|
||||||
|
mask_only_con_region = np.zeros(textline_mask_tot_ea.shape)
|
||||||
|
mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contour_par], color=(1, 1, 1))
|
||||||
|
|
||||||
|
if self.textline_light:
|
||||||
|
all_text_region_raw = np.copy(textline_mask_tot_ea)
|
||||||
|
all_text_region_raw[mask_only_con_region == 0] = 0
|
||||||
|
cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(all_text_region_raw)
|
||||||
|
cnt_clean_rot = filter_contours_area_of_image(all_text_region_raw, cnt_clean_rot_raw, hir_on_cnt_clean_rot,
|
||||||
|
max_area=1, min_area=0.00001)
|
||||||
|
else:
|
||||||
|
all_text_region_raw = np.copy(textline_mask_tot_ea[y: y + h, x: x + w])
|
||||||
|
mask_only_con_region = mask_only_con_region[y: y + h, x: x + w]
|
||||||
|
all_text_region_raw[mask_only_con_region == 0] = 0
|
||||||
|
cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_deskew, contour_par, box_text)
|
||||||
|
|
||||||
|
return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue