mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-04-30 19:22:03 +02:00
more run_single refactoring…
- `run_single`: re-use `return_contours_of_interested_region` for extraction and filtering of text region contours - `run_single`: isolate new function `match_deskewed_contours` - `run_single`: apply dilation afterwards - rename `contours_only_text_parent_d_ordered` → `polygons_of_textregions_d` - rename `contours_only_text_parent` → `polygons_of_textregions` - rename `contours_only_text_parent_h` → `polygons_of_textregions_h` - `do_work_of_slopes_new_curved` and `get_slopes_and_deskew_new_curved`: no need for `mask_texts_only` array arg - `filter_contours_inside_a_bigger_one`: no need for `image` as array arg, simplify - `split_textregion_main_vs_head`: simplify, re-order arguments and return tuple logically - if no main text regions are found, just convert marginals to main text and continue normally instead of stopping early w/ empty marginals (i.e. no textlines)
This commit is contained in:
parent
a2f43b8d69
commit
7b5aa2a1f6
4 changed files with 293 additions and 378 deletions
|
|
@ -33,7 +33,6 @@ import gc
|
|||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import shapely.affinity
|
||||
from scipy.signal import find_peaks
|
||||
from scipy.ndimage import gaussian_filter1d
|
||||
|
||||
|
|
@ -56,6 +55,7 @@ from .utils.contour import (
|
|||
return_parent_contours,
|
||||
dilate_textregion_contours,
|
||||
dilate_textline_contours,
|
||||
match_deskewed_contours,
|
||||
polygon2contour,
|
||||
contour2polygon,
|
||||
join_polygons,
|
||||
|
|
@ -1034,13 +1034,12 @@ class Eynollah:
|
|||
slopes)
|
||||
|
||||
def get_slopes_and_deskew_new_curved(self, contours_par, textline_mask_tot, boxes,
|
||||
mask_texts_only, num_col, scale_par, slope_deskew, name):
|
||||
num_col, scale_par, slope_deskew, name):
|
||||
if not len(contours_par):
|
||||
return [], [], []
|
||||
self.logger.debug("enter get_slopes_and_deskew_new_curved")
|
||||
results = map(partial(do_work_of_slopes_new_curved,
|
||||
textline_mask_tot_ea=textline_mask_tot,
|
||||
mask_texts_only=mask_texts_only,
|
||||
num_col=num_col,
|
||||
scale_par=scale_par,
|
||||
slope_deskew=slope_deskew,
|
||||
|
|
@ -2025,83 +2024,72 @@ class Eynollah:
|
|||
else:
|
||||
return ordered
|
||||
|
||||
def filter_contours_inside_a_bigger_one(self, contours, contours_d_ordered, image,
|
||||
def filter_contours_inside_a_bigger_one(self, contours, contours_d, shape,
|
||||
marginal_cnts=None, type_contour="textregion"):
|
||||
if type_contour == "textregion":
|
||||
areas = np.array(list(map(cv2.contourArea, contours)))
|
||||
area_tot = image.shape[0]*image.shape[1]
|
||||
areas_ratio = areas / area_tot
|
||||
areas = areas / float(np.prod(shape[:2]))
|
||||
cx_main, cy_main = find_center_of_contours(contours)
|
||||
|
||||
contours_index_small = np.flatnonzero(areas_ratio < 1e-3)
|
||||
contours_index_large = np.flatnonzero(areas_ratio >= 1e-3)
|
||||
contours = ensure_array(contours)
|
||||
indices_small = np.flatnonzero(areas < 1e-3)
|
||||
indices_large = np.flatnonzero(areas >= 1e-3)
|
||||
|
||||
#contours_> = [contours[ind] for ind in contours_index_large]
|
||||
indexes_to_be_removed = []
|
||||
for ind_small in contours_index_small:
|
||||
results = [cv2.pointPolygonTest(contours[ind_large], (cx_main[ind_small],
|
||||
cy_main[ind_small]),
|
||||
False)
|
||||
for ind_large in contours_index_large]
|
||||
results = np.array(results)
|
||||
if np.any(results==1):
|
||||
indexes_to_be_removed.append(ind_small)
|
||||
elif marginal_cnts:
|
||||
results_marginal = [cv2.pointPolygonTest(marginal_cnt,
|
||||
indices_drop = []
|
||||
for ind_small in indices_small:
|
||||
results = [cv2.pointPolygonTest(contours[ind_large],
|
||||
(cx_main[ind_small],
|
||||
cy_main[ind_small]),
|
||||
False)
|
||||
for marginal_cnt in marginal_cnts]
|
||||
results_marginal = np.array(results_marginal)
|
||||
if np.any(results_marginal==1):
|
||||
indexes_to_be_removed.append(ind_small)
|
||||
for ind_large in indices_large]
|
||||
results = np.array(results)
|
||||
if np.any(results == 1):
|
||||
indices_drop.append(ind_small)
|
||||
elif marginal_cnts:
|
||||
results = [cv2.pointPolygonTest(contour,
|
||||
(cx_main[ind_small],
|
||||
cy_main[ind_small]),
|
||||
False)
|
||||
for contour in marginal_cnts]
|
||||
results = np.array(results)
|
||||
if np.any(results == 1):
|
||||
indices_drop.append(ind_small)
|
||||
|
||||
contours = np.delete(contours, indexes_to_be_removed, axis=0)
|
||||
if len(contours_d_ordered):
|
||||
contours_d_ordered = np.delete(contours_d_ordered, indexes_to_be_removed, axis=0)
|
||||
contours = np.delete(contours, indices_drop, axis=0)
|
||||
if len(contours_d):
|
||||
contours_d = ensure_array(contours_d)
|
||||
contours_d = np.delete(contours_d, indices_drop, axis=0)
|
||||
|
||||
return contours, contours_d_ordered
|
||||
return contours, contours_d
|
||||
|
||||
else:
|
||||
contours_txtline_of_all_textregions = []
|
||||
indexes_of_textline_tot = []
|
||||
index_textline_inside_textregion = []
|
||||
contours_of_contours = []
|
||||
indexes_parent = []
|
||||
indexes_child = []
|
||||
for ind_region, textlines in enumerate(contours):
|
||||
contours_txtline_of_all_textregions.extend(textlines)
|
||||
index_textline_inside_textregion.extend(list(range(len(textlines))))
|
||||
indexes_of_textline_tot.extend([ind_region] * len(textlines))
|
||||
contours_of_contours.extend(textlines)
|
||||
indexes_parent.extend([ind_region] * len(textlines))
|
||||
indexes_child.extend(list(range(len(textlines))))
|
||||
|
||||
areas_tot = np.array(list(map(cv2.contourArea, contours_txtline_of_all_textregions)))
|
||||
area_tot_tot = image.shape[0]*image.shape[1]
|
||||
cx_main_tot, cy_main_tot = find_center_of_contours(contours_txtline_of_all_textregions)
|
||||
areas = np.array(list(map(cv2.contourArea, contours_of_contours)))
|
||||
cx, cy = find_center_of_contours(contours_of_contours)
|
||||
|
||||
textline_in_textregion_index_to_del = {}
|
||||
for ij in range(len(contours_txtline_of_all_textregions)):
|
||||
area_of_con_interest = areas_tot[ij]
|
||||
args_without = np.delete(np.arange(len(contours_txtline_of_all_textregions)), ij)
|
||||
areas_without = areas_tot[args_without]
|
||||
args_with_bigger_area = args_without[areas_without > 1.5*area_of_con_interest]
|
||||
for i in range(len(contours_of_contours)):
|
||||
args_other = np.setdiff1d(np.arange(len(contours_of_contours)), i)
|
||||
areas_other = areas[args_other]
|
||||
args_other_larger = args_other[areas_other > 1.5 * areas[i]]
|
||||
|
||||
if len(args_with_bigger_area)>0:
|
||||
results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind],
|
||||
(cx_main_tot[ij],
|
||||
cy_main_tot[ij]),
|
||||
False)
|
||||
for ind in args_with_bigger_area ]
|
||||
results = np.array(results)
|
||||
if np.any(results==1):
|
||||
#print(indexes_of_textline_tot[ij], index_textline_inside_textregion[ij])
|
||||
for ind in args_other_larger:
|
||||
if cv2.pointPolygonTest(contours_of_contours[ind],
|
||||
(cx[i], cy[i]), False) == 1:
|
||||
textline_in_textregion_index_to_del.setdefault(
|
||||
indexes_of_textline_tot[ij], list()).append(
|
||||
index_textline_inside_textregion[ij])
|
||||
#contours[indexes_of_textline_tot[ij]].pop(index_textline_inside_textregion[ij])
|
||||
indexes_parent[i], list()).append(
|
||||
indexes_child[i])
|
||||
|
||||
for textregion_index_to_del in textline_in_textregion_index_to_del:
|
||||
contours[textregion_index_to_del] = list(np.delete(
|
||||
contours[textregion_index_to_del],
|
||||
textline_in_textregion_index_to_del[textregion_index_to_del],
|
||||
# needed so numpy does not flatten the entire result when 0 left
|
||||
axis=0))
|
||||
for where, which in textline_in_textregion_index_to_del.items():
|
||||
contours[where] = [line for idx, line in enumerate(contours[where])
|
||||
if idx not in which]
|
||||
|
||||
return contours
|
||||
|
||||
|
|
@ -2122,7 +2110,7 @@ class Eynollah:
|
|||
|
||||
def filter_contours_without_textline_inside(
|
||||
self, contours_par, contours_textline,
|
||||
contours_only_text_parent_d_ordered,
|
||||
contours_only_text_parent_d,
|
||||
conf_contours_textregions):
|
||||
|
||||
assert len(contours_par) == len(contours_textline)
|
||||
|
|
@ -2135,7 +2123,7 @@ class Eynollah:
|
|||
|
||||
return (filterfun(contours_par),
|
||||
filterfun(contours_textline),
|
||||
filterfun(contours_only_text_parent_d_ordered),
|
||||
filterfun(contours_only_text_parent_d),
|
||||
filterfun(conf_contours_textregions),
|
||||
# indices
|
||||
)
|
||||
|
|
@ -2144,8 +2132,8 @@ class Eynollah:
|
|||
self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals,
|
||||
slopes_marginals, conf_marginals, mid_point_of_page_width):
|
||||
cx_marg, cy_marg = find_center_of_contours(polygons_of_marginals)
|
||||
cx_marg = np.array(cx_marg)
|
||||
cy_marg = np.array(cy_marg)
|
||||
cx_marg = ensure_array(cx_marg)
|
||||
cy_marg = ensure_array(cy_marg)
|
||||
|
||||
def split(lis):
|
||||
left, right = [], []
|
||||
|
|
@ -2330,7 +2318,7 @@ class Eynollah:
|
|||
all_found_textline_polygons = [ all_found_textline_polygons ]
|
||||
all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons)
|
||||
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
|
||||
all_found_textline_polygons, None, textline_mask_tot_ea, type_contour="textline")
|
||||
all_found_textline_polygons, None, None, type_contour="textline")
|
||||
|
||||
order_text_new = [0]
|
||||
slopes =[0]
|
||||
|
|
@ -2522,239 +2510,48 @@ class Eynollah:
|
|||
conf_images = get_region_confidences(polygons_of_images, regions_confidence)
|
||||
conf_tables = get_region_confidences(contours_tables, table_confidence)
|
||||
|
||||
text_only = (text_regions_p == label_text) * 1
|
||||
polygons_of_textregions = return_contours_of_interested_region(text_regions_p, label_text,
|
||||
min_area=MIN_AREA_REGION)
|
||||
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
||||
text_only_d = (text_regions_p_d == label_text) * 1
|
||||
|
||||
#print("text region early 2 in %.1fs", time.time() - t0)
|
||||
###min_con_area = 0.000005
|
||||
contours_only_text, hir_on_text = return_contours_of_image(text_only)
|
||||
contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text)
|
||||
contours_only_text_parent_d_ordered = []
|
||||
contours_only_text_parent_d = []
|
||||
|
||||
if len(contours_only_text_parent) > 0:
|
||||
areas_tot_text = np.prod(text_only.shape)
|
||||
areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent])
|
||||
areas_cnt_text = areas_cnt_text / float(areas_tot_text)
|
||||
#self.logger.info('areas_cnt_text %s', areas_cnt_text)
|
||||
contours_only_text_parent = ensure_array(contours_only_text_parent)
|
||||
contours_only_text_parent = contours_only_text_parent[areas_cnt_text > MIN_AREA_REGION]
|
||||
areas_cnt_text_parent = areas_cnt_text[areas_cnt_text > MIN_AREA_REGION]
|
||||
|
||||
index_con_parents = np.argsort(areas_cnt_text_parent)
|
||||
contours_only_text_parent = contours_only_text_parent[index_con_parents]
|
||||
areas_cnt_text_parent = areas_cnt_text_parent[index_con_parents]
|
||||
|
||||
centers = np.stack(find_center_of_contours(contours_only_text_parent)) # [2, N]
|
||||
|
||||
center0 = centers[:, -1:] # [2, 1]
|
||||
|
||||
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
||||
contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d)
|
||||
contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d)
|
||||
|
||||
areas_tot_text_d = np.prod(text_only_d.shape)
|
||||
areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d])
|
||||
areas_cnt_text_d = areas_cnt_text_d / float(areas_tot_text_d)
|
||||
|
||||
contours_only_text_parent_d = ensure_array(contours_only_text_parent_d)
|
||||
contours_only_text_parent_d = contours_only_text_parent_d[areas_cnt_text_d > MIN_AREA_REGION]
|
||||
areas_cnt_text_d = areas_cnt_text_d[areas_cnt_text_d > MIN_AREA_REGION]
|
||||
|
||||
if len(contours_only_text_parent_d):
|
||||
index_con_parents_d = np.argsort(areas_cnt_text_d)
|
||||
contours_only_text_parent_d = contours_only_text_parent_d[index_con_parents_d]
|
||||
areas_cnt_text_d = areas_cnt_text_d[index_con_parents_d]
|
||||
|
||||
centers_d = np.stack(find_center_of_contours(contours_only_text_parent_d)) # [2, N]
|
||||
|
||||
center0_d = centers_d[:, -1:].copy() # [2, 1]
|
||||
|
||||
# find the largest among the largest 5 deskewed contours
|
||||
# that is also closest to the largest original contour
|
||||
last5_centers_d = centers_d[:, -5:]
|
||||
dists_d = np.linalg.norm(center0 - last5_centers_d, axis=0)
|
||||
ind_largest = len(contours_only_text_parent_d) - last5_centers_d.shape[1] + np.argmin(dists_d)
|
||||
center0_d[:, 0] = centers_d[:, ind_largest]
|
||||
|
||||
# order new contours the same way as the undeskewed contours
|
||||
# (by calculating the offset of the largest contours, respectively,
|
||||
# of the new and undeskewed image; then for each contour,
|
||||
# finding the closest new contour, with proximity calculated
|
||||
# as distance of their centers modulo offset vector)
|
||||
(h, w) = text_only.shape[:2]
|
||||
center = (w // 2.0, h // 2.0)
|
||||
M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0)
|
||||
M_22 = np.array(M)[:2, :2]
|
||||
center0 = np.dot(M_22, center0) # [2, 1]
|
||||
offset = center0 - center0_d # [2, 1]
|
||||
|
||||
centers = np.dot(M_22, centers) - offset # [2,N]
|
||||
# add dimension for area (so only contours of similar size will be considered close)
|
||||
centers = np.append(centers, areas_cnt_text_parent[np.newaxis], axis=0)
|
||||
centers_d = np.append(centers_d, areas_cnt_text_d[np.newaxis], axis=0)
|
||||
|
||||
dists = np.zeros((len(contours_only_text_parent), len(contours_only_text_parent_d)))
|
||||
for i in range(len(contours_only_text_parent)):
|
||||
dists[i] = np.linalg.norm(centers[:, i:i + 1] - centers_d, axis=0)
|
||||
corresp = np.zeros(dists.shape, dtype=bool)
|
||||
# keep searching next-closest until at least one correspondence on each side
|
||||
while not np.all(corresp.sum(axis=1)) or not np.all(corresp.sum(axis=0)):
|
||||
idx = np.nanargmin(dists)
|
||||
i, j = np.unravel_index(idx, dists.shape)
|
||||
dists[i, j] = np.nan
|
||||
corresp[i, j] = True
|
||||
# print("original/deskewed adjacency", corresp.nonzero())
|
||||
contours_only_text_parent_d_ordered = np.zeros_like(contours_only_text_parent)
|
||||
contours_only_text_parent_d_ordered = contours_only_text_parent_d[np.argmax(corresp, axis=1)]
|
||||
# img1 = np.zeros(text_only_d.shape[:2], dtype=np.uint8)
|
||||
# for i in range(len(contours_only_text_parent)):
|
||||
# cv2.fillPoly(img1, pts=[contours_only_text_parent_d_ordered[i]], color=i + 1)
|
||||
# plt.subplot(1, 4, 1, title="direct corresp contours")
|
||||
# plt.imshow(img1)
|
||||
# img2 = np.zeros(text_only_d.shape[:2], dtype=np.uint8)
|
||||
# join deskewed regions mapping to single original ones
|
||||
for i in range(len(contours_only_text_parent)):
|
||||
if np.count_nonzero(corresp[i]) > 1:
|
||||
indices = np.flatnonzero(corresp[i])
|
||||
# print("joining", indices)
|
||||
polygons_d = [contour2polygon(contour)
|
||||
for contour in contours_only_text_parent_d[indices]]
|
||||
contour_d = polygon2contour(join_polygons(polygons_d))
|
||||
contours_only_text_parent_d_ordered[i] = contour_d
|
||||
# cv2.fillPoly(img2, pts=[contour_d], color=i + 1)
|
||||
# plt.subplot(1, 4, 2, title="joined contours")
|
||||
# plt.imshow(img2)
|
||||
# img3 = np.zeros(text_only_d.shape[:2], dtype=np.uint8)
|
||||
# split deskewed regions mapping to multiple original ones
|
||||
def deskew(polygon):
|
||||
polygon = shapely.affinity.rotate(polygon, -slope_deskew, origin=center)
|
||||
#polygon = shapely.affinity.translate(polygon, *offset.squeeze())
|
||||
return polygon
|
||||
for j in range(len(contours_only_text_parent_d)):
|
||||
if np.count_nonzero(corresp[:, j]) > 1:
|
||||
indices = np.flatnonzero(corresp[:, j])
|
||||
# print("splitting along", indices)
|
||||
polygons = [deskew(contour2polygon(contour))
|
||||
for contour in contours_only_text_parent[indices]]
|
||||
polygon_d = contour2polygon(contours_only_text_parent_d[j])
|
||||
polygons_d = [make_intersection(polygon_d, polygon)
|
||||
for polygon in polygons]
|
||||
# ignore where there is no actual overlap
|
||||
indices = indices[np.flatnonzero(polygons_d)]
|
||||
contours_d = [polygon2contour(polygon_d)
|
||||
for polygon_d in polygons_d
|
||||
if polygon_d]
|
||||
contours_only_text_parent_d_ordered[indices] = contours_d
|
||||
# cv2.fillPoly(img3, pts=contours_d, color=j + 1)
|
||||
# plt.subplot(1, 4, 3, title="split contours")
|
||||
# plt.imshow(img3)
|
||||
# img4 = np.zeros(text_only_d.shape[:2], dtype=np.uint8)
|
||||
# for i in range(len(contours_only_text_parent)):
|
||||
# cv2.fillPoly(img4, pts=[contours_only_text_parent_d_ordered[i]], color=i + 1)
|
||||
# plt.subplot(1, 4, 4, title="result contours")
|
||||
# plt.imshow(img4)
|
||||
# plt.show()
|
||||
# from matplotlib import patches as ptchs
|
||||
# plt.subplot(1, 2, 1, title="undeskewed")
|
||||
# plt.imshow(text_only)
|
||||
# centers = np.stack(find_center_of_contours(contours_only_text_parent)) # [2, N]
|
||||
# for i in range(len(contours_only_text_parent)):
|
||||
# cnt = contours_only_text_parent[i]
|
||||
# ctr = centers[:, i]
|
||||
# plt.gca().add_patch(ptchs.Polygon(cnt[:, 0], closed=False, fill=False, color='blue'))
|
||||
# plt.gca().scatter(ctr[0], ctr[1], 20, c='blue', marker='x')
|
||||
# plt.gca().text(ctr[0], ctr[1], str(i), c='blue')
|
||||
# plt.subplot(1, 2, 2, title="deskewed")
|
||||
# plt.imshow(text_only_d)
|
||||
# centers_d = np.stack(find_center_of_contours(contours_only_text_parent_d_ordered)) # [2, N]
|
||||
# for i in range(len(contours_only_text_parent)):
|
||||
# cnt = contours_only_text_parent[i]
|
||||
# cnt = polygon2contour(deskew(contour2polygon(cnt)))
|
||||
# plt.gca().add_patch(ptchs.Polygon(cnt[:, 0], closed=False, fill=False, color='blue'))
|
||||
# for i in range(len(contours_only_text_parent_d_ordered)):
|
||||
# cnt = contours_only_text_parent_d_ordered[i]
|
||||
# ctr = centers_d[:, i]
|
||||
# plt.gca().add_patch(ptchs.Polygon(cnt[:, 0], closed=False, fill=False, color='red'))
|
||||
# plt.gca().scatter(ctr[0], ctr[1], 20, c='red', marker='x')
|
||||
# plt.gca().text(ctr[0], ctr[1], str(i), c='red')
|
||||
# plt.show()
|
||||
|
||||
if not len(contours_only_text_parent):
|
||||
# stop early
|
||||
# FIXME: Why not just (convert polygons_of_marginals to contours_only_text_parent and)
|
||||
# continue processing normally?
|
||||
# Why not (at least) split marginals left vs right and get textlines?
|
||||
empty_marginals = [[]] * len(polygons_of_marginals)
|
||||
if self.full_layout:
|
||||
pcgts = writer.build_pagexml_full_layout(
|
||||
found_polygons_text_region=[],
|
||||
found_polygons_text_region_h=[],
|
||||
page_coord=page_coord,
|
||||
order_of_texts=[],
|
||||
all_found_textline_polygons=[],
|
||||
all_found_textline_polygons_h=[],
|
||||
all_box_coord=[],
|
||||
all_box_coord_h=[],
|
||||
found_polygons_images=polygons_of_images,
|
||||
found_polygons_tables=contours_tables,
|
||||
found_polygons_drop_capitals=[],
|
||||
found_polygons_marginals_left=polygons_of_marginals,
|
||||
found_polygons_marginals_right=polygons_of_marginals,
|
||||
all_found_textline_polygons_marginals_left=empty_marginals,
|
||||
all_found_textline_polygons_marginals_right=empty_marginals,
|
||||
all_box_coord_marginals_left=empty_marginals,
|
||||
all_box_coord_marginals_right=empty_marginals,
|
||||
slopes=[],
|
||||
slopes_h=[],
|
||||
slopes_marginals_left=[],
|
||||
slopes_marginals_right=[],
|
||||
cont_page=cont_page,
|
||||
polygons_seplines=polygons_seplines,
|
||||
)
|
||||
polygons_of_textregions_d = return_contours_of_interested_region(text_regions_p_d, label_text,
|
||||
min_area=MIN_AREA_REGION)
|
||||
if (len(polygons_of_textregions) and
|
||||
len(polygons_of_textregions_d)):
|
||||
polygons_of_textregions_d = \
|
||||
match_deskewed_contours(
|
||||
slope_deskew,
|
||||
polygons_of_textregions,
|
||||
polygons_of_textregions_d,
|
||||
text_regions_p.shape,
|
||||
text_regions_p_d.shape)
|
||||
else:
|
||||
pcgts = writer.build_pagexml_no_full_layout(
|
||||
found_polygons_text_region=[],
|
||||
page_coord=page_coord,
|
||||
order_of_texts=[],
|
||||
all_found_textline_polygons=[],
|
||||
all_box_coord=[],
|
||||
found_polygons_images=polygons_of_images,
|
||||
found_polygons_tables=contours_tables,
|
||||
found_polygons_marginals_left=polygons_of_marginals,
|
||||
found_polygons_marginals_right=polygons_of_marginals,
|
||||
all_found_textline_polygons_marginals_left=empty_marginals,
|
||||
all_found_textline_polygons_marginals_right=empty_marginals,
|
||||
all_box_coord_marginals_left=empty_marginals,
|
||||
all_box_coord_marginals_right=empty_marginals,
|
||||
slopes=[],
|
||||
slopes_marginals_left=[],
|
||||
slopes_marginals_right=[],
|
||||
cont_page=cont_page,
|
||||
polygons_seplines=polygons_seplines,
|
||||
)
|
||||
writer.write_pagexml(pcgts)
|
||||
self.logger.info("Job done in %.1fs", time.time() - t0)
|
||||
return
|
||||
|
||||
#print("text region early 3 in %.1fs", time.time() - t0)
|
||||
contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent)
|
||||
contours_only_text_parent , contours_only_text_parent_d_ordered = self.filter_contours_inside_a_bigger_one(
|
||||
contours_only_text_parent, contours_only_text_parent_d_ordered, text_only,
|
||||
polygons_of_textregions_d = []
|
||||
#print("text region early 2 in %.1fs", time.time() - t0)
|
||||
(polygons_of_textregions,
|
||||
polygons_of_textregions_d) = self.filter_contours_inside_a_bigger_one(
|
||||
polygons_of_textregions,
|
||||
polygons_of_textregions_d,
|
||||
text_regions_p.shape,
|
||||
marginal_cnts=polygons_of_marginals)
|
||||
#print("text region early 3.5 in %.1fs", time.time() - t0)
|
||||
conf_textregions = get_region_confidences(contours_only_text_parent, regions_confidence)
|
||||
#contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent)
|
||||
#print("text region early 3 in %.1fs", time.time() - t0)
|
||||
polygons_of_textregions = dilate_textregion_contours(polygons_of_textregions)
|
||||
conf_textregions = get_region_confidences(polygons_of_textregions, regions_confidence)
|
||||
|
||||
if not len(polygons_of_textregions):
|
||||
polygons_of_textregions = polygons_of_marginals
|
||||
polygons_of_marginals = []
|
||||
conf_textregions = conf_marginals
|
||||
conf_marginals = []
|
||||
|
||||
#print("text region early 4 in %.1fs", time.time() - t0)
|
||||
boxes_text = get_text_region_boxes_by_given_contours(contours_only_text_parent)
|
||||
boxes_text = get_text_region_boxes_by_given_contours(polygons_of_textregions)
|
||||
boxes_marginals = get_text_region_boxes_by_given_contours(polygons_of_marginals)
|
||||
#print("text region early 5 in %.1fs", time.time() - t0)
|
||||
## birdan sora chock chakir
|
||||
if not self.curved_line:
|
||||
all_found_textline_polygons, \
|
||||
all_box_coord, slopes = self.get_slopes_and_deskew_new_light2(
|
||||
contours_only_text_parent, textline_mask_tot_ea_org,
|
||||
polygons_of_textregions, textline_mask_tot_ea_org,
|
||||
boxes_text, slope_deskew)
|
||||
all_found_textline_polygons_marginals, \
|
||||
all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new_light2(
|
||||
|
|
@ -2764,28 +2561,28 @@ class Eynollah:
|
|||
all_found_textline_polygons = dilate_textline_contours(
|
||||
all_found_textline_polygons)
|
||||
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
|
||||
all_found_textline_polygons, None, textline_mask_tot_ea_org, type_contour="textline")
|
||||
all_found_textline_polygons, None, None, type_contour="textline")
|
||||
all_found_textline_polygons_marginals = dilate_textline_contours(
|
||||
all_found_textline_polygons_marginals)
|
||||
contours_only_text_parent, all_found_textline_polygons, \
|
||||
contours_only_text_parent_d_ordered, conf_textregions = \
|
||||
polygons_of_textregions, all_found_textline_polygons, \
|
||||
polygons_of_textregions_d, conf_textregions = \
|
||||
self.filter_contours_without_textline_inside(
|
||||
contours_only_text_parent, all_found_textline_polygons,
|
||||
contours_only_text_parent_d_ordered, conf_textregions)
|
||||
polygons_of_textregions, all_found_textline_polygons,
|
||||
polygons_of_textregions_d, conf_textregions)
|
||||
else:
|
||||
scale_param = 1
|
||||
textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2)
|
||||
all_found_textline_polygons, \
|
||||
all_box_coord, slopes = self.get_slopes_and_deskew_new_curved(
|
||||
contours_only_text_parent, textline_mask_tot_ea_erode,
|
||||
boxes_text, text_only,
|
||||
polygons_of_textregions, textline_mask_tot_ea_erode,
|
||||
boxes_text,
|
||||
num_col_classifier, scale_param, slope_deskew, image['name'])
|
||||
all_found_textline_polygons = small_textlines_to_parent_adherence2(
|
||||
all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier)
|
||||
all_found_textline_polygons_marginals, \
|
||||
all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new_curved(
|
||||
polygons_of_marginals, textline_mask_tot_ea_erode,
|
||||
boxes_marginals, text_only,
|
||||
boxes_marginals,
|
||||
num_col_classifier, scale_param, slope_deskew, image['name'])
|
||||
all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(
|
||||
all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier)
|
||||
|
|
@ -2813,32 +2610,32 @@ class Eynollah:
|
|||
|
||||
if self.full_layout:
|
||||
(text_regions_p,
|
||||
contours_only_text_parent,
|
||||
contours_only_text_parent_h,
|
||||
polygons_of_textregions,
|
||||
polygons_of_textregions_h,
|
||||
polygons_of_textregions_d,
|
||||
polygons_of_textregions_h_d,
|
||||
all_box_coord,
|
||||
all_box_coord_h,
|
||||
all_found_textline_polygons,
|
||||
all_found_textline_polygons_h,
|
||||
slopes,
|
||||
slopes_h,
|
||||
contours_only_text_parent_d_ordered,
|
||||
contours_only_text_parent_h_d_ordered,
|
||||
conf_textregions,
|
||||
conf_textregions_h) = split_textregion_main_vs_head(
|
||||
text_regions_p,
|
||||
regions_fully,
|
||||
contours_only_text_parent,
|
||||
polygons_of_textregions,
|
||||
polygons_of_textregions_d,
|
||||
all_box_coord,
|
||||
all_found_textline_polygons,
|
||||
slopes,
|
||||
contours_only_text_parent_d_ordered,
|
||||
conf_textregions)
|
||||
|
||||
if self.plotter:
|
||||
self.plotter.save_plot_of_layout(text_regions_p, image_page, image['name'])
|
||||
self.plotter.save_plot_of_layout_all(text_regions_p, image_page, image['name'])
|
||||
##all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(
|
||||
##text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h,
|
||||
##text_regions_p, polygons_of_drop_capitals, polygons_of_textregions, polygons_of_textregions_h,
|
||||
##all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h,
|
||||
##kernel=KERNEL, curved_line=self.curved_line)
|
||||
|
||||
|
|
@ -2847,11 +2644,11 @@ class Eynollah:
|
|||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||
_, _, matrix_of_seps_ch, splitter_y_new, _ = find_number_of_columns_in_document(
|
||||
text_regions_p, num_col_classifier, self.tables, label_seps,
|
||||
contours_h=None if self.headers_off else contours_only_text_parent_h)
|
||||
contours_h=None if self.headers_off else polygons_of_textregions_h)
|
||||
else:
|
||||
_, _, matrix_of_seps_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
|
||||
text_regions_p_d, num_col_classifier, self.tables, label_seps,
|
||||
contours_h=None if self.headers_off else contours_only_text_parent_h_d_ordered)
|
||||
contours_h=None if self.headers_off else polygons_of_textregions_h_d)
|
||||
|
||||
if not erosion_hurts:
|
||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||
|
|
@ -2875,8 +2672,8 @@ class Eynollah:
|
|||
logger=self.logger)
|
||||
else:
|
||||
polygons_of_drop_capitals = []
|
||||
contours_only_text_parent_h = []
|
||||
contours_only_text_parent_h_d_ordered = []
|
||||
polygons_of_textregions_h = []
|
||||
polygons_of_textregions_h_d = []
|
||||
|
||||
if self.plotter:
|
||||
self.plotter.write_images_into_directory(polygons_of_images, image_page,
|
||||
|
|
@ -2894,21 +2691,21 @@ class Eynollah:
|
|||
|
||||
if self.reading_order_machine_based:
|
||||
order_text_new = self.do_order_of_regions_with_model(
|
||||
contours_only_text_parent,
|
||||
contours_only_text_parent_h,
|
||||
polygons_of_textregions,
|
||||
polygons_of_textregions_h,
|
||||
polygons_of_drop_capitals,
|
||||
text_regions_p)
|
||||
else:
|
||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||
order_text_new = self.do_order_of_regions(
|
||||
contours_only_text_parent,
|
||||
contours_only_text_parent_h,
|
||||
polygons_of_textregions,
|
||||
polygons_of_textregions_h,
|
||||
polygons_of_drop_capitals,
|
||||
boxes, textline_mask_tot_ea)
|
||||
else:
|
||||
order_text_new = self.do_order_of_regions(
|
||||
contours_only_text_parent_d_ordered,
|
||||
contours_only_text_parent_h_d_ordered,
|
||||
polygons_of_textregions_d,
|
||||
polygons_of_textregions_h_d,
|
||||
polygons_of_drop_capitals,
|
||||
boxes_d, textline_mask_tot_ea_d)
|
||||
self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s")
|
||||
|
|
@ -2917,8 +2714,8 @@ class Eynollah:
|
|||
|
||||
if self.full_layout:
|
||||
pcgts = writer.build_pagexml_full_layout(
|
||||
found_polygons_text_region=contours_only_text_parent,
|
||||
found_polygons_text_region_h=contours_only_text_parent_h,
|
||||
found_polygons_text_region=polygons_of_textregions,
|
||||
found_polygons_text_region_h=polygons_of_textregions_h,
|
||||
page_coord=page_coord,
|
||||
order_of_texts=order_text_new,
|
||||
all_found_textline_polygons=all_found_textline_polygons,
|
||||
|
|
@ -2950,7 +2747,7 @@ class Eynollah:
|
|||
)
|
||||
else:
|
||||
pcgts = writer.build_pagexml_no_full_layout(
|
||||
found_polygons_text_region=contours_only_text_parent,
|
||||
found_polygons_text_region=polygons_of_textregions,
|
||||
page_coord=page_coord,
|
||||
order_of_texts=order_text_new,
|
||||
all_found_textline_polygons=all_found_textline_polygons,
|
||||
|
|
|
|||
|
|
@ -883,12 +883,14 @@ def check_any_text_region_in_model_one_is_main_or_header(
|
|||
conf_contours_head)
|
||||
|
||||
def split_textregion_main_vs_head(
|
||||
regions_model_1, regions_model_full,
|
||||
contours_only_text_parent,
|
||||
all_box_coord, all_found_textline_polygons,
|
||||
regions_model_1,
|
||||
regions_model_full,
|
||||
polygons_of_textregions,
|
||||
polygons_of_textregions_d,
|
||||
all_box_coord,
|
||||
all_found_textline_polygons,
|
||||
slopes,
|
||||
contours_only_text_parent_d_ordered,
|
||||
conf_contours,
|
||||
conf_textregions,
|
||||
label_text=1,
|
||||
label_head_full=2,
|
||||
label_head_final=2,
|
||||
|
|
@ -907,35 +909,19 @@ def split_textregion_main_vs_head(
|
|||
(regions_model_full.shape[1] // zoom,
|
||||
regions_model_full.shape[0] // zoom),
|
||||
interpolation=cv2.INTER_NEAREST)
|
||||
contours_only_text_parent_z = [contour // zoom
|
||||
for contour in contours_only_text_parent]
|
||||
contours_z = [contour // zoom
|
||||
for contour in polygons_of_textregions]
|
||||
|
||||
###
|
||||
_, _, x_min_main, x_max_main, y_min_main, y_max_main, _ = \
|
||||
find_new_features_of_contours(contours_only_text_parent_z)
|
||||
find_new_features_of_contours(contours_z)
|
||||
|
||||
length_con=x_max_main-x_min_main
|
||||
height_con=y_max_main-y_min_main
|
||||
|
||||
all_found_textline_polygons_main=[]
|
||||
all_found_textline_polygons_head=[]
|
||||
|
||||
all_box_coord_main=[]
|
||||
all_box_coord_head=[]
|
||||
|
||||
slopes_main=[]
|
||||
slopes_head=[]
|
||||
|
||||
contours_only_text_parent_main=[]
|
||||
contours_only_text_parent_head=[]
|
||||
|
||||
conf_contours_main=[]
|
||||
conf_contours_head=[]
|
||||
|
||||
contours_only_text_parent_main_d=[]
|
||||
contours_only_text_parent_head_d=[]
|
||||
|
||||
for ii, con in enumerate(contours_only_text_parent_z):
|
||||
main = []
|
||||
head = []
|
||||
for ii, con in enumerate(contours_z):
|
||||
parent = np.zeros_like(regions_model_1)
|
||||
parent = cv2.fillPoly(parent, pts=[con], color=1)
|
||||
|
||||
|
|
@ -948,24 +934,14 @@ def split_textregion_main_vs_head(
|
|||
( pixels_head >= 0.3 * pixels_main and
|
||||
length_con[ii] >= 3 * height_con[ii] )):
|
||||
|
||||
regions_model_1[(regions_model_1 == label_text) & (parent > 0)] = label_head_final
|
||||
contours_only_text_parent_head.append(contours_only_text_parent[ii])
|
||||
conf_contours_head.append(conf_contours[ii])
|
||||
if len(contours_only_text_parent_d_ordered):
|
||||
contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii])
|
||||
all_box_coord_head.append(all_box_coord[ii])
|
||||
slopes_head.append(slopes[ii])
|
||||
all_found_textline_polygons_head.append(all_found_textline_polygons[ii])
|
||||
head.append(ii)
|
||||
label = label_head_final
|
||||
|
||||
else:
|
||||
regions_model_1[(regions_model_1 == label_text) & (parent > 0)] = label_main_final
|
||||
contours_only_text_parent_main.append(contours_only_text_parent[ii])
|
||||
conf_contours_main.append(conf_contours[ii])
|
||||
if len(contours_only_text_parent_d_ordered):
|
||||
contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii])
|
||||
all_box_coord_main.append(all_box_coord[ii])
|
||||
slopes_main.append(slopes[ii])
|
||||
all_found_textline_polygons_main.append(all_found_textline_polygons[ii])
|
||||
main.append(ii)
|
||||
label = label_main_final
|
||||
|
||||
regions_model_1[(regions_model_1 == label_text) & (parent > 0)] = label
|
||||
|
||||
### to make it faster
|
||||
regions_model_1 = cv2.resize(regions_model_1, (w_o, h_o), interpolation=cv2.INTER_NEAREST)
|
||||
|
|
@ -974,19 +950,25 @@ def split_textregion_main_vs_head(
|
|||
# interpolation=cv2.INTER_NEAREST)
|
||||
###
|
||||
|
||||
def select(lis, indexes):
|
||||
if not len(lis):
|
||||
return []
|
||||
return [lis[ind] for ind in indexes]
|
||||
|
||||
return (regions_model_1,
|
||||
contours_only_text_parent_main,
|
||||
contours_only_text_parent_head,
|
||||
all_box_coord_main,
|
||||
all_box_coord_head,
|
||||
all_found_textline_polygons_main,
|
||||
all_found_textline_polygons_head,
|
||||
slopes_main,
|
||||
slopes_head,
|
||||
contours_only_text_parent_main_d,
|
||||
contours_only_text_parent_head_d,
|
||||
conf_contours_main,
|
||||
conf_contours_head)
|
||||
select(polygons_of_textregions, main),
|
||||
select(polygons_of_textregions, head),
|
||||
select(polygons_of_textregions_d, main),
|
||||
select(polygons_of_textregions_d, head),
|
||||
select(all_box_coord, main),
|
||||
select(all_box_coord, head),
|
||||
select(all_found_textline_polygons, main),
|
||||
select(all_found_textline_polygons, head),
|
||||
select(slopes, main),
|
||||
select(slopes, head),
|
||||
select(conf_textregions, main),
|
||||
select(conf_textregions, head),
|
||||
)
|
||||
|
||||
def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col):
|
||||
# print(textlines_con)
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import numpy as np
|
|||
from scipy.sparse.csgraph import minimum_spanning_tree
|
||||
from shapely.geometry import Polygon, LineString
|
||||
from shapely.geometry.polygon import orient
|
||||
from shapely import set_precision
|
||||
from shapely import set_precision, affinity
|
||||
from shapely.ops import unary_union, nearest_points
|
||||
|
||||
from .rotate import rotate_image, rotation_image_new
|
||||
|
|
@ -106,8 +106,7 @@ def return_parent_contours(contours, hierarchy):
|
|||
if hierarchy[0][i][3] == -1]
|
||||
return contours_parent
|
||||
|
||||
def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002):
|
||||
# pixels of images are identified by 5
|
||||
def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002, dilate=0):
|
||||
if region_pre_p.ndim == 3:
|
||||
mask = (region_pre_p[:, :, 0] == label).astype(np.uint8)
|
||||
else:
|
||||
|
|
@ -116,7 +115,9 @@ def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002):
|
|||
contours_imgs, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
contours_imgs = return_parent_contours(contours_imgs, hierarchy)
|
||||
contours_imgs = filter_contours_area_of_image_tables(mask, contours_imgs, hierarchy,
|
||||
max_area=1, min_area=min_area)
|
||||
max_area=1,
|
||||
min_area=min_area,
|
||||
dilate=dilate)
|
||||
return contours_imgs
|
||||
|
||||
def do_work_of_contours_in_image(contour, index_r_con, img, slope_first):
|
||||
|
|
@ -260,6 +261,141 @@ def dilate_textregion_contours(all_found_textregion_polygons):
|
|||
[polygon2contour(contour2polygon(contour, dilate=6))
|
||||
for contour in all_found_textregion_polygons])
|
||||
|
||||
def match_deskewed_contours(slope_deskew, contours_o, contours_d, shape_o, shape_d):
|
||||
from . import ensure_array
|
||||
|
||||
cntareas_o = np.array([cv2.contourArea(contour) for contour in contours_o])
|
||||
cntareas_d = np.array([cv2.contourArea(contour) for contour in contours_d])
|
||||
cntareas_o = cntareas_o / float(np.prod(shape_o[:2]))
|
||||
cntareas_d = cntareas_d / float(np.prod(shape_d[:2]))
|
||||
|
||||
contours_o = ensure_array(contours_o)
|
||||
contours_d = ensure_array(contours_d)
|
||||
|
||||
sort_o = np.argsort(cntareas_o)
|
||||
sort_d = np.argsort(cntareas_d)
|
||||
contours_o = contours_o[sort_o]
|
||||
contours_d = contours_d[sort_d]
|
||||
cntareas_o = cntareas_o[sort_o]
|
||||
cntareas_d = cntareas_d[sort_d]
|
||||
|
||||
centers_o = np.stack(find_center_of_contours(contours_o)) # [2, N]
|
||||
centers_d = np.stack(find_center_of_contours(contours_d)) # [2, N]
|
||||
center0_o = centers_o[:, -1:] # [2, 1]
|
||||
center0_d = centers_d[:, -1:] # [2, 1]
|
||||
|
||||
# find the largest among the largest 5 deskewed contours
|
||||
# that is also closest to the largest original contour
|
||||
last5_centers_d = centers_d[:, -5:]
|
||||
dists_d = np.linalg.norm(center0_o - last5_centers_d, axis=0)
|
||||
ind_largest = len(contours_d) - last5_centers_d.shape[1] + np.argmin(dists_d)
|
||||
center0_d[:, 0] = centers_d[:, ind_largest]
|
||||
|
||||
# order new contours the same way as the undeskewed contours
|
||||
# (by calculating the offset of the largest contours, respectively,
|
||||
# of the new and undeskewed image; then for each contour,
|
||||
# finding the closest new contour, with proximity calculated
|
||||
# as distance of their centers modulo offset vector)
|
||||
h_o, w_o = shape_o[:2]
|
||||
center_o = (w_o // 2, h_o // 2)
|
||||
M = cv2.getRotationMatrix2D(center_o, slope_deskew, 1.0)
|
||||
M_22 = np.array(M)[:2, :2]
|
||||
center0_o = np.dot(M_22, center0_o) # [2, 1]
|
||||
offset = center0_o - center0_d # [2, 1]
|
||||
|
||||
centers_o = np.dot(M_22, centers_o) - offset # [2,N]
|
||||
# add dimension for area (so only contours of similar size will be considered close)
|
||||
centers_o = np.append(centers_o, cntareas_o[np.newaxis], axis=0)
|
||||
centers_d = np.append(centers_d, cntareas_d[np.newaxis], axis=0)
|
||||
|
||||
dists = np.zeros((len(contours_o), len(contours_d)))
|
||||
for i in range(len(contours_o)):
|
||||
dists[i] = np.linalg.norm(centers_o[:, i: i + 1] - centers_d, axis=0)
|
||||
corresp = np.zeros(dists.shape, dtype=bool)
|
||||
# keep searching next-closest until at least one correspondence on each side
|
||||
while not np.all(corresp.sum(axis=1)) or not np.all(corresp.sum(axis=0)):
|
||||
idx = np.nanargmin(dists)
|
||||
i, j = np.unravel_index(idx, dists.shape)
|
||||
dists[i, j] = np.nan
|
||||
corresp[i, j] = True
|
||||
# print("original/deskewed adjacency", corresp.nonzero())
|
||||
contours_d_ordered = contours_d[np.argmax(corresp, axis=1)]
|
||||
# from matplotlib import pyplot as plt
|
||||
# img1 = np.zeros(shape_d[:2], dtype=np.uint8)
|
||||
# for i in range(len(contours_o)):
|
||||
# cv2.fillPoly(img1, pts=[contours_d_ordered[i]], color=i + 1)
|
||||
# plt.subplot(1, 4, 1, title="direct corresp contours")
|
||||
# plt.imshow(img1)
|
||||
# img2 = np.zeros(shape_d[:2], dtype=np.uint8)
|
||||
# join deskewed regions mapping to single original ones
|
||||
for i in range(len(contours_o)):
|
||||
if np.count_nonzero(corresp[i]) > 1:
|
||||
indices = np.flatnonzero(corresp[i])
|
||||
# print("joining", indices)
|
||||
polygons_d = [contour2polygon(contour)
|
||||
for contour in contours_d[indices]]
|
||||
contour_d_joined = polygon2contour(join_polygons(polygons_d))
|
||||
contours_d_ordered[i] = contour_d_joined
|
||||
# cv2.fillPoly(img2, pts=[contour_d_joined], color=i + 1)
|
||||
# plt.subplot(1, 4, 2, title="joined contours")
|
||||
# plt.imshow(img2)
|
||||
# img3 = np.zeros(shape_d[:2], dtype=np.uint8)
|
||||
# split deskewed regions mapping to multiple original ones
|
||||
def deskew(polygon):
|
||||
polygon = affinity.rotate(polygon, -slope_deskew, origin=center_o)
|
||||
#polygon = affinity.translate(polygon, *offset.squeeze())
|
||||
return polygon
|
||||
for j in range(len(contours_d)):
|
||||
if np.count_nonzero(corresp[:, j]) > 1:
|
||||
indices = np.flatnonzero(corresp[:, j])
|
||||
# print("splitting along", indices)
|
||||
polygons_o = [deskew(contour2polygon(contour))
|
||||
for contour in contours_o[indices]]
|
||||
polygon_d = contour2polygon(contours_d[j])
|
||||
polygons_d = [make_intersection(polygon_d, polygon)
|
||||
for polygon in polygons_o]
|
||||
# ignore where there is no actual overlap
|
||||
indices = indices[np.flatnonzero(polygons_d)]
|
||||
contours_d_joined = [polygon2contour(polygon_d)
|
||||
for polygon_d in polygons_d
|
||||
if polygon_d]
|
||||
contours_d_ordered[indices] = contours_d_joined
|
||||
# cv2.fillPoly(img3, pts=contours_d_joined, color=j + 1)
|
||||
# plt.subplot(1, 4, 3, title="split contours")
|
||||
# plt.imshow(img3)
|
||||
# img4 = np.zeros(shape_d[:2], dtype=np.uint8)
|
||||
# for i in range(len(contours_o)):
|
||||
# cv2.fillPoly(img4, pts=[contours_d_ordered[i]], color=i + 1)
|
||||
# plt.subplot(1, 4, 4, title="result contours")
|
||||
# plt.imshow(img4)
|
||||
# plt.show()
|
||||
# from matplotlib import patches as ptchs
|
||||
# plt.subplot(1, 2, 1, title="undeskewed")
|
||||
# plt.imshow(mask_o)
|
||||
# centers_o = np.stack(find_center_of_contours(contours_o)) # [2, N]
|
||||
# for i in range(len(contours_o)):
|
||||
# cnt = contours_o[i]
|
||||
# ctr = centers_o[:, i]
|
||||
# plt.gca().add_patch(ptchs.Polygon(cnt[:, 0], closed=False, fill=False, color='blue'))
|
||||
# plt.gca().scatter(ctr[0], ctr[1], 20, c='blue', marker='x')
|
||||
# plt.gca().text(ctr[0], ctr[1], str(i), c='blue')
|
||||
# plt.subplot(1, 2, 2, title="deskewed")
|
||||
# plt.imshow(mask_d)
|
||||
# centers_d = np.stack(find_center_of_contours(contours_d_ordered)) # [2, N]
|
||||
# for i in range(len(contours_o)):
|
||||
# cnt = contours_o[i]
|
||||
# cnt = polygon2contour(deskew(contour2polygon(cnt)))
|
||||
# plt.gca().add_patch(ptchs.Polygon(cnt[:, 0], closed=False, fill=False, color='blue'))
|
||||
# for i in range(len(contours_d_ordered)):
|
||||
# cnt = contours_d_ordered[i]
|
||||
# ctr = centers_d[:, i]
|
||||
# plt.gca().add_patch(ptchs.Polygon(cnt[:, 0], closed=False, fill=False, color='red'))
|
||||
# plt.gca().scatter(ctr[0], ctr[1], 20, c='red', marker='x')
|
||||
# plt.gca().text(ctr[0], ctr[1], str(i), c='red')
|
||||
# plt.show()
|
||||
invsort_o = np.argsort(sort_o)
|
||||
return contours_d_ordered[invsort_o]
|
||||
|
||||
def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0):
|
||||
polygon = Polygon([point[0] for point in contour])
|
||||
if dilate:
|
||||
|
|
|
|||
|
|
@ -1584,7 +1584,7 @@ def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, name=No
|
|||
|
||||
def do_work_of_slopes_new_curved(
|
||||
box_text, contour_par,
|
||||
textline_mask_tot_ea=None, mask_texts_only=None,
|
||||
textline_mask_tot_ea=None,
|
||||
num_col=1, scale_par=1.0, slope_deskew=0.0,
|
||||
logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None, name=None
|
||||
):
|
||||
|
|
@ -1633,7 +1633,7 @@ def do_work_of_slopes_new_curved(
|
|||
if abs(slope_for_all) < 45:
|
||||
textline_region_in_image = np.zeros(textline_mask_tot_ea.shape)
|
||||
x, y, w, h = cv2.boundingRect(contour_par)
|
||||
mask_biggest = np.zeros(mask_texts_only.shape)
|
||||
mask_biggest = np.zeros(textline_mask_tot_ea.shape)
|
||||
mask_biggest = cv2.fillPoly(mask_biggest, pts=[contour_par], color=(1, 1, 1))
|
||||
mask_region_in_patch_region = mask_biggest[y : y + h, x : x + w]
|
||||
textline_biggest_region = mask_biggest * textline_mask_tot_ea
|
||||
|
|
@ -1653,7 +1653,7 @@ def do_work_of_slopes_new_curved(
|
|||
|
||||
textlines_cnt_per_region = []
|
||||
for jjjj in range(len(cnt_textlines_in_image)):
|
||||
mask_biggest2 = np.zeros(mask_texts_only.shape)
|
||||
mask_biggest2 = np.zeros(textline_mask_tot_ea.shape)
|
||||
mask_biggest2 = cv2.fillPoly(mask_biggest2, pts=[cnt_textlines_in_image[jjjj]], color=(1, 1, 1))
|
||||
if num_col + 1 == 1:
|
||||
mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=5)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue