mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-15 19:09:58 +02:00
do_order_of_regions: simplify
- avoid loops in favour of array processing
This commit is contained in:
parent
7387f5a929
commit
e9bb62bd86
1 changed files with 64 additions and 94 deletions
|
@ -2526,7 +2526,7 @@ class Eynollah:
|
||||||
contours_only_text_parent_h)
|
contours_only_text_parent_h)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
arg_text_con = []
|
arg_text_con_main = np.zeros(len(contours_only_text_parent), dtype=int)
|
||||||
for ii in range(len(contours_only_text_parent)):
|
for ii in range(len(contours_only_text_parent)):
|
||||||
check_if_textregion_located_in_a_box = False
|
check_if_textregion_located_in_a_box = False
|
||||||
for jj, box in enumerate(boxes):
|
for jj, box in enumerate(boxes):
|
||||||
|
@ -2534,7 +2534,7 @@ class Eynollah:
|
||||||
Mx_main[ii] < box[1] and
|
Mx_main[ii] < box[1] and
|
||||||
my_main[ii] >= box[2] and
|
my_main[ii] >= box[2] and
|
||||||
My_main[ii] < box[3]):
|
My_main[ii] < box[3]):
|
||||||
arg_text_con.append(jj)
|
arg_text_con_main[ii] = jj
|
||||||
check_if_textregion_located_in_a_box = True
|
check_if_textregion_located_in_a_box = True
|
||||||
break
|
break
|
||||||
if not check_if_textregion_located_in_a_box:
|
if not check_if_textregion_located_in_a_box:
|
||||||
|
@ -2545,11 +2545,11 @@ class Eynollah:
|
||||||
pcontained_in_box = ((boxes[:, 2] <= cy_main[ii]) & (cy_main[ii] < boxes[:, 3]) &
|
pcontained_in_box = ((boxes[:, 2] <= cy_main[ii]) & (cy_main[ii] < boxes[:, 3]) &
|
||||||
(boxes[:, 0] <= cx_main[ii]) & (cx_main[ii] < boxes[:, 1]))
|
(boxes[:, 0] <= cx_main[ii]) & (cx_main[ii] < boxes[:, 1]))
|
||||||
ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box))
|
ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box))
|
||||||
arg_text_con.append(ind_min)
|
arg_text_con_main[ii] = ind_min
|
||||||
args_contours = np.arange(len(arg_text_con))
|
args_contours_main = np.arange(len(contours_only_text_parent))
|
||||||
order_by_con_main = np.zeros(len(arg_text_con))
|
order_by_con_main = np.zeros_like(arg_text_con_main)
|
||||||
|
|
||||||
arg_text_con_h = []
|
arg_text_con_head = np.zeros(len(contours_only_text_parent_h), dtype=int)
|
||||||
for ii in range(len(contours_only_text_parent_h)):
|
for ii in range(len(contours_only_text_parent_h)):
|
||||||
check_if_textregion_located_in_a_box = False
|
check_if_textregion_located_in_a_box = False
|
||||||
for jj, box in enumerate(boxes):
|
for jj, box in enumerate(boxes):
|
||||||
|
@ -2557,7 +2557,7 @@ class Eynollah:
|
||||||
Mx_head[ii] < box[1] and
|
Mx_head[ii] < box[1] and
|
||||||
my_head[ii] >= box[2] and
|
my_head[ii] >= box[2] and
|
||||||
My_head[ii] < box[3]):
|
My_head[ii] < box[3]):
|
||||||
arg_text_con_h.append(jj)
|
arg_text_con_head[ii] = jj
|
||||||
check_if_textregion_located_in_a_box = True
|
check_if_textregion_located_in_a_box = True
|
||||||
break
|
break
|
||||||
if not check_if_textregion_located_in_a_box:
|
if not check_if_textregion_located_in_a_box:
|
||||||
|
@ -2568,9 +2568,9 @@ class Eynollah:
|
||||||
pcontained_in_box = ((boxes[:, 2] <= cy_head[ii]) & (cy_head[ii] < boxes[:, 3]) &
|
pcontained_in_box = ((boxes[:, 2] <= cy_head[ii]) & (cy_head[ii] < boxes[:, 3]) &
|
||||||
(boxes[:, 0] <= cx_head[ii]) & (cx_head[ii] < boxes[:, 1]))
|
(boxes[:, 0] <= cx_head[ii]) & (cx_head[ii] < boxes[:, 1]))
|
||||||
ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box))
|
ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box))
|
||||||
arg_text_con_h.append(ind_min)
|
arg_text_con_head[ii] = ind_min
|
||||||
args_contours_h = np.arange(len(arg_text_con_h))
|
args_contours_head = np.arange(len(contours_only_text_parent_h))
|
||||||
order_by_con_head = np.zeros(len(arg_text_con_h))
|
order_by_con_head = np.zeros_like(arg_text_con_head)
|
||||||
|
|
||||||
ref_point = 0
|
ref_point = 0
|
||||||
order_of_texts_tot = []
|
order_of_texts_tot = []
|
||||||
|
@ -2578,10 +2578,10 @@ class Eynollah:
|
||||||
for iij, box in enumerate(boxes):
|
for iij, box in enumerate(boxes):
|
||||||
ys = slice(*box[2:4])
|
ys = slice(*box[2:4])
|
||||||
xs = slice(*box[0:2])
|
xs = slice(*box[0:2])
|
||||||
args_contours_box = args_contours[np.array(arg_text_con) == iij]
|
args_contours_box_main = args_contours_main[arg_text_con_main == iij]
|
||||||
args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij]
|
args_contours_box_head = args_contours_head[arg_text_con_head == iij]
|
||||||
con_inter_box = contours_only_text_parent[args_contours_box]
|
con_inter_box = contours_only_text_parent[args_contours_box_main]
|
||||||
con_inter_box_h = contours_only_text_parent_h[args_contours_box_h]
|
con_inter_box_h = contours_only_text_parent_h[args_contours_box_head]
|
||||||
|
|
||||||
indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
||||||
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2])
|
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2])
|
||||||
|
@ -2595,14 +2595,14 @@ class Eynollah:
|
||||||
indexes_sorted_head = indexes_sorted[kind_of_texts_sorted == 2]
|
indexes_sorted_head = indexes_sorted[kind_of_texts_sorted == 2]
|
||||||
indexes_by_type_head = index_by_kind_sorted[kind_of_texts_sorted == 2]
|
indexes_by_type_head = index_by_kind_sorted[kind_of_texts_sorted == 2]
|
||||||
|
|
||||||
for zahler, _ in enumerate(args_contours_box):
|
for zahler, _ in enumerate(args_contours_box_main):
|
||||||
arg_order_v = indexes_sorted_main[zahler]
|
arg_order_v = indexes_sorted_main[zahler]
|
||||||
order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \
|
order_by_con_main[args_contours_box_main[indexes_by_type_main[zahler]]] = \
|
||||||
np.flatnonzero(indexes_sorted == arg_order_v) + ref_point
|
np.flatnonzero(indexes_sorted == arg_order_v) + ref_point
|
||||||
|
|
||||||
for zahler, _ in enumerate(args_contours_box_h):
|
for zahler, _ in enumerate(args_contours_box_head):
|
||||||
arg_order_v = indexes_sorted_head[zahler]
|
arg_order_v = indexes_sorted_head[zahler]
|
||||||
order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = \
|
order_by_con_head[args_contours_box_head[indexes_by_type_head[zahler]]] = \
|
||||||
np.flatnonzero(indexes_sorted == arg_order_v) + ref_point
|
np.flatnonzero(indexes_sorted == arg_order_v) + ref_point
|
||||||
|
|
||||||
for jji in range(len(id_of_texts)):
|
for jji in range(len(id_of_texts)):
|
||||||
|
@ -2610,20 +2610,13 @@ class Eynollah:
|
||||||
id_of_texts_tot.append(id_of_texts[jji])
|
id_of_texts_tot.append(id_of_texts[jji])
|
||||||
ref_point += len(id_of_texts)
|
ref_point += len(id_of_texts)
|
||||||
|
|
||||||
order_of_texts_tot = []
|
order_of_texts_tot = np.concatenate((order_by_con_main,
|
||||||
for tj1 in range(len(contours_only_text_parent)):
|
order_by_con_head))
|
||||||
order_of_texts_tot.append(int(order_by_con_main[tj1]))
|
order_text_new = np.argsort(order_of_texts_tot)
|
||||||
|
|
||||||
for tj1 in range(len(contours_only_text_parent_h)):
|
|
||||||
order_of_texts_tot.append(int(order_by_con_head[tj1]))
|
|
||||||
|
|
||||||
order_text_new = []
|
|
||||||
for iii in range(len(order_of_texts_tot)):
|
|
||||||
order_text_new.append(np.flatnonzero(np.array(order_of_texts_tot) == iii))
|
|
||||||
|
|
||||||
except Exception as why:
|
except Exception as why:
|
||||||
self.logger.error(why)
|
self.logger.error(why)
|
||||||
arg_text_con = []
|
arg_text_con_main = np.zeros(len(contours_only_text_parent), dtype=int)
|
||||||
for ii in range(len(contours_only_text_parent)):
|
for ii in range(len(contours_only_text_parent)):
|
||||||
check_if_textregion_located_in_a_box = False
|
check_if_textregion_located_in_a_box = False
|
||||||
for jj, box in enumerate(boxes):
|
for jj, box in enumerate(boxes):
|
||||||
|
@ -2632,10 +2625,9 @@ class Eynollah:
|
||||||
cy_main[ii] >= box[2] and
|
cy_main[ii] >= box[2] and
|
||||||
cy_main[ii] < box[3]):
|
cy_main[ii] < box[3]):
|
||||||
# this is valid if the center of region identify in which box it is located
|
# this is valid if the center of region identify in which box it is located
|
||||||
arg_text_con.append(jj)
|
arg_text_con_main[ii] = jj
|
||||||
check_if_textregion_located_in_a_box = True
|
check_if_textregion_located_in_a_box = True
|
||||||
break
|
break
|
||||||
|
|
||||||
if not check_if_textregion_located_in_a_box:
|
if not check_if_textregion_located_in_a_box:
|
||||||
# dists_tr_from_box = [math.sqrt((cx_main[ii] - 0.5 * box[1] - 0.5 * box[0]) ** 2 +
|
# dists_tr_from_box = [math.sqrt((cx_main[ii] - 0.5 * box[1] - 0.5 * box[0]) ** 2 +
|
||||||
# (cy_main[ii] - 0.5 * box[3] - 0.5 * box[2]) ** 2)
|
# (cy_main[ii] - 0.5 * box[3] - 0.5 * box[2]) ** 2)
|
||||||
|
@ -2644,13 +2636,11 @@ class Eynollah:
|
||||||
pcontained_in_box = ((boxes[:, 2] <= cy_main[ii]) & (cy_main[ii] < boxes[:, 3]) &
|
pcontained_in_box = ((boxes[:, 2] <= cy_main[ii]) & (cy_main[ii] < boxes[:, 3]) &
|
||||||
(boxes[:, 0] <= cx_main[ii]) & (cx_main[ii] < boxes[:, 1]))
|
(boxes[:, 0] <= cx_main[ii]) & (cx_main[ii] < boxes[:, 1]))
|
||||||
ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box))
|
ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box))
|
||||||
arg_text_con.append(ind_min)
|
arg_text_con_main[ii] = ind_min
|
||||||
args_contours = np.arange(len(arg_text_con))
|
args_contours_main = np.arange(len(contours_only_text_parent))
|
||||||
order_by_con_main = np.zeros(len(arg_text_con))
|
order_by_con_main = np.zeros_like(arg_text_con_main)
|
||||||
|
|
||||||
############################# head
|
arg_text_con_head = np.zeros(len(contours_only_text_parent_h), dtype=int)
|
||||||
|
|
||||||
arg_text_con_h = []
|
|
||||||
for ii in range(len(contours_only_text_parent_h)):
|
for ii in range(len(contours_only_text_parent_h)):
|
||||||
check_if_textregion_located_in_a_box = False
|
check_if_textregion_located_in_a_box = False
|
||||||
for jj, box in enumerate(boxes):
|
for jj, box in enumerate(boxes):
|
||||||
|
@ -2659,7 +2649,7 @@ class Eynollah:
|
||||||
cy_head[ii] >= box[2] and
|
cy_head[ii] >= box[2] and
|
||||||
cy_head[ii] < box[3]):
|
cy_head[ii] < box[3]):
|
||||||
# this is valid if the center of region identify in which box it is located
|
# this is valid if the center of region identify in which box it is located
|
||||||
arg_text_con_h.append(jj)
|
arg_text_con_head[ii] = jj
|
||||||
check_if_textregion_located_in_a_box = True
|
check_if_textregion_located_in_a_box = True
|
||||||
break
|
break
|
||||||
if not check_if_textregion_located_in_a_box:
|
if not check_if_textregion_located_in_a_box:
|
||||||
|
@ -2670,9 +2660,9 @@ class Eynollah:
|
||||||
pcontained_in_box = ((boxes[:, 2] <= cy_head[ii]) & (cy_head[ii] < boxes[:, 3]) &
|
pcontained_in_box = ((boxes[:, 2] <= cy_head[ii]) & (cy_head[ii] < boxes[:, 3]) &
|
||||||
(boxes[:, 0] <= cx_head[ii]) & (cx_head[ii] < boxes[:, 1]))
|
(boxes[:, 0] <= cx_head[ii]) & (cx_head[ii] < boxes[:, 1]))
|
||||||
ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box))
|
ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box))
|
||||||
arg_text_con_h.append(ind_min)
|
arg_text_con_head[ii] = ind_min
|
||||||
args_contours_h = np.arange(len(arg_text_con_h))
|
args_contours_head = np.arange(len(contours_only_text_parent_h))
|
||||||
order_by_con_head = np.zeros(len(arg_text_con_h))
|
order_by_con_head = np.zeros_like(arg_text_con_head)
|
||||||
|
|
||||||
ref_point = 0
|
ref_point = 0
|
||||||
order_of_texts_tot = []
|
order_of_texts_tot = []
|
||||||
|
@ -2680,10 +2670,10 @@ class Eynollah:
|
||||||
for iij, box in enumerate(boxes):
|
for iij, box in enumerate(boxes):
|
||||||
ys = slice(*box[2:4])
|
ys = slice(*box[2:4])
|
||||||
xs = slice(*box[0:2])
|
xs = slice(*box[0:2])
|
||||||
args_contours_box = args_contours[np.array(arg_text_con) == iij]
|
args_contours_box_main = args_contours_main[arg_text_con_main == iij]
|
||||||
args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij]
|
args_contours_box_head = args_contours_head[arg_text_con_head == iij]
|
||||||
con_inter_box = contours_only_text_parent[args_contours_box]
|
con_inter_box = contours_only_text_parent[args_contours_box_main]
|
||||||
con_inter_box_h = contours_only_text_parent_h[args_contours_box_h]
|
con_inter_box_h = contours_only_text_parent_h[args_contours_box_head]
|
||||||
|
|
||||||
indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
||||||
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2])
|
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2])
|
||||||
|
@ -2697,14 +2687,14 @@ class Eynollah:
|
||||||
indexes_sorted_head = indexes_sorted[kind_of_texts_sorted == 2]
|
indexes_sorted_head = indexes_sorted[kind_of_texts_sorted == 2]
|
||||||
indexes_by_type_head = index_by_kind_sorted[kind_of_texts_sorted == 2]
|
indexes_by_type_head = index_by_kind_sorted[kind_of_texts_sorted == 2]
|
||||||
|
|
||||||
for zahler, _ in enumerate(args_contours_box):
|
for zahler, _ in enumerate(args_contours_box_main):
|
||||||
arg_order_v = indexes_sorted_main[zahler]
|
arg_order_v = indexes_sorted_main[zahler]
|
||||||
order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \
|
order_by_con_main[args_contours_box_main[indexes_by_type_main[zahler]]] = \
|
||||||
np.flatnonzero(indexes_sorted == arg_order_v) + ref_point
|
np.flatnonzero(indexes_sorted == arg_order_v) + ref_point
|
||||||
|
|
||||||
for zahler, _ in enumerate(args_contours_box_h):
|
for zahler, _ in enumerate(args_contours_box_head):
|
||||||
arg_order_v = indexes_sorted_head[zahler]
|
arg_order_v = indexes_sorted_head[zahler]
|
||||||
order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = \
|
order_by_con_head[args_contours_box_head[indexes_by_type_head[zahler]]] = \
|
||||||
np.flatnonzero(indexes_sorted == arg_order_v) + ref_point
|
np.flatnonzero(indexes_sorted == arg_order_v) + ref_point
|
||||||
|
|
||||||
for jji in range(len(id_of_texts)):
|
for jji in range(len(id_of_texts)):
|
||||||
|
@ -2712,16 +2702,9 @@ class Eynollah:
|
||||||
id_of_texts_tot.append(id_of_texts[jji])
|
id_of_texts_tot.append(id_of_texts[jji])
|
||||||
ref_point += len(id_of_texts)
|
ref_point += len(id_of_texts)
|
||||||
|
|
||||||
order_of_texts_tot = []
|
order_of_texts_tot = np.concatenate((order_by_con_main,
|
||||||
for tj1 in range(len(contours_only_text_parent)):
|
order_by_con_head))
|
||||||
order_of_texts_tot.append(int(order_by_con_main[tj1]))
|
order_text_new = np.argsort(order_of_texts_tot)
|
||||||
|
|
||||||
for tj1 in range(len(contours_only_text_parent_h)):
|
|
||||||
order_of_texts_tot.append(int(order_by_con_head[tj1]))
|
|
||||||
|
|
||||||
order_text_new = []
|
|
||||||
for iii in range(len(order_of_texts_tot)):
|
|
||||||
order_text_new.append(np.flatnonzero(np.array(order_of_texts_tot) == iii))
|
|
||||||
|
|
||||||
self.logger.debug("exit do_order_of_regions_full_layout")
|
self.logger.debug("exit do_order_of_regions_full_layout")
|
||||||
return order_text_new, id_of_texts_tot
|
return order_text_new, id_of_texts_tot
|
||||||
|
@ -2739,7 +2722,7 @@ class Eynollah:
|
||||||
contours_only_text_parent)
|
contours_only_text_parent)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
arg_text_con = []
|
arg_text_con_main = np.zeros(len(contours_only_text_parent), dtype=int)
|
||||||
for ii in range(len(contours_only_text_parent)):
|
for ii in range(len(contours_only_text_parent)):
|
||||||
check_if_textregion_located_in_a_box = False
|
check_if_textregion_located_in_a_box = False
|
||||||
for jj, box in enumerate(boxes):
|
for jj, box in enumerate(boxes):
|
||||||
|
@ -2747,7 +2730,7 @@ class Eynollah:
|
||||||
Mx_main[ii] < box[1] and
|
Mx_main[ii] < box[1] and
|
||||||
my_main[ii] >= box[2] and
|
my_main[ii] >= box[2] and
|
||||||
My_main[ii] < box[3]):
|
My_main[ii] < box[3]):
|
||||||
arg_text_con.append(jj)
|
arg_text_con_main[ii] = jj
|
||||||
check_if_textregion_located_in_a_box = True
|
check_if_textregion_located_in_a_box = True
|
||||||
break
|
break
|
||||||
if not check_if_textregion_located_in_a_box:
|
if not check_if_textregion_located_in_a_box:
|
||||||
|
@ -2758,9 +2741,9 @@ class Eynollah:
|
||||||
pcontained_in_box = ((boxes[:, 2] <= cy_main[ii]) & (cy_main[ii] < boxes[:, 3]) &
|
pcontained_in_box = ((boxes[:, 2] <= cy_main[ii]) & (cy_main[ii] < boxes[:, 3]) &
|
||||||
(boxes[:, 0] <= cx_main[ii]) & (cx_main[ii] < boxes[:, 1]))
|
(boxes[:, 0] <= cx_main[ii]) & (cx_main[ii] < boxes[:, 1]))
|
||||||
ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box))
|
ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box))
|
||||||
arg_text_con.append(ind_min)
|
arg_text_con_main[ii] = ind_min
|
||||||
args_contours = np.arange(len(arg_text_con))
|
args_contours_main = np.arange(len(contours_only_text_parent))
|
||||||
order_by_con_main = np.zeros(len(arg_text_con))
|
order_by_con_main = np.zeros_like(arg_text_con_main)
|
||||||
|
|
||||||
ref_point = 0
|
ref_point = 0
|
||||||
order_of_texts_tot = []
|
order_of_texts_tot = []
|
||||||
|
@ -2768,8 +2751,8 @@ class Eynollah:
|
||||||
for iij, box in enumerate(boxes):
|
for iij, box in enumerate(boxes):
|
||||||
ys = slice(*box[2:4])
|
ys = slice(*box[2:4])
|
||||||
xs = slice(*box[0:2])
|
xs = slice(*box[0:2])
|
||||||
args_contours_box = args_contours[np.array(arg_text_con) == iij]
|
args_contours_box_main = args_contours_main[arg_text_con_main == iij]
|
||||||
con_inter_box = contours_only_text_parent[args_contours_box]
|
con_inter_box = contours_only_text_parent[args_contours_box_main]
|
||||||
con_inter_box_h = []
|
con_inter_box_h = []
|
||||||
|
|
||||||
indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
||||||
|
@ -2782,9 +2765,9 @@ class Eynollah:
|
||||||
indexes_sorted_main = indexes_sorted[kind_of_texts_sorted == 1]
|
indexes_sorted_main = indexes_sorted[kind_of_texts_sorted == 1]
|
||||||
indexes_by_type_main = index_by_kind_sorted[kind_of_texts_sorted == 1]
|
indexes_by_type_main = index_by_kind_sorted[kind_of_texts_sorted == 1]
|
||||||
|
|
||||||
for zahler, _ in enumerate(args_contours_box):
|
for zahler, _ in enumerate(args_contours_box_main):
|
||||||
arg_order_v = indexes_sorted_main[zahler]
|
arg_order_v = indexes_sorted_main[zahler]
|
||||||
order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \
|
order_by_con_main[args_contours_box_main[indexes_by_type_main[zahler]]] = \
|
||||||
np.flatnonzero(indexes_sorted == arg_order_v) + ref_point
|
np.flatnonzero(indexes_sorted == arg_order_v) + ref_point
|
||||||
|
|
||||||
for jji, _ in enumerate(id_of_texts):
|
for jji, _ in enumerate(id_of_texts):
|
||||||
|
@ -2792,17 +2775,12 @@ class Eynollah:
|
||||||
id_of_texts_tot.append(id_of_texts[jji])
|
id_of_texts_tot.append(id_of_texts[jji])
|
||||||
ref_point += len(id_of_texts)
|
ref_point += len(id_of_texts)
|
||||||
|
|
||||||
order_of_texts_tot = []
|
order_of_texts_tot = order_by_con_main
|
||||||
for tj1 in range(len(contours_only_text_parent)):
|
order_text_new = np.argsort(order_of_texts_tot)
|
||||||
order_of_texts_tot.append(int(order_by_con_main[tj1]))
|
|
||||||
|
|
||||||
order_text_new = []
|
|
||||||
for iii in range(len(order_of_texts_tot)):
|
|
||||||
order_text_new.append(np.flatnonzero(np.array(order_of_texts_tot) == iii))
|
|
||||||
|
|
||||||
except Exception as why:
|
except Exception as why:
|
||||||
self.logger.error(why)
|
self.logger.error(why)
|
||||||
arg_text_con = []
|
arg_text_con_main = np.zeros(len(contours_only_text_parent), dtype=int)
|
||||||
for ii in range(len(contours_only_text_parent)):
|
for ii in range(len(contours_only_text_parent)):
|
||||||
check_if_textregion_located_in_a_box = False
|
check_if_textregion_located_in_a_box = False
|
||||||
for jj, box in enumerate(boxes):
|
for jj, box in enumerate(boxes):
|
||||||
|
@ -2811,7 +2789,7 @@ class Eynollah:
|
||||||
cy_main[ii] >= box[2] and
|
cy_main[ii] >= box[2] and
|
||||||
cy_main[ii] < box[3]):
|
cy_main[ii] < box[3]):
|
||||||
# this is valid if the center of region identify in which box it is located
|
# this is valid if the center of region identify in which box it is located
|
||||||
arg_text_con.append(jj)
|
arg_text_con_main[ii] = jj
|
||||||
check_if_textregion_located_in_a_box = True
|
check_if_textregion_located_in_a_box = True
|
||||||
break
|
break
|
||||||
if not check_if_textregion_located_in_a_box:
|
if not check_if_textregion_located_in_a_box:
|
||||||
|
@ -2819,9 +2797,9 @@ class Eynollah:
|
||||||
pcontained_in_box = ((boxes[:, 2] <= cy_main[ii]) & (cy_main[ii] < boxes[:, 3]) &
|
pcontained_in_box = ((boxes[:, 2] <= cy_main[ii]) & (cy_main[ii] < boxes[:, 3]) &
|
||||||
(boxes[:, 0] <= cx_main[ii]) & (cx_main[ii] < boxes[:, 1]))
|
(boxes[:, 0] <= cx_main[ii]) & (cx_main[ii] < boxes[:, 1]))
|
||||||
ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box))
|
ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box))
|
||||||
arg_text_con[ii] = ind_min
|
arg_text_con_main[ii] = ind_min
|
||||||
args_contours = np.arange(len(contours_only_text_parent))
|
args_contours_main = np.arange(len(contours_only_text_parent))
|
||||||
order_by_con_main = np.zeros(len(arg_text_con))
|
order_by_con_main = np.zeros_like(arg_text_con_main)
|
||||||
|
|
||||||
ref_point = 0
|
ref_point = 0
|
||||||
order_of_texts_tot = []
|
order_of_texts_tot = []
|
||||||
|
@ -2829,11 +2807,9 @@ class Eynollah:
|
||||||
for iij, box in enumerate(boxes):
|
for iij, box in enumerate(boxes):
|
||||||
ys = slice(*box[2:4])
|
ys = slice(*box[2:4])
|
||||||
xs = slice(*box[0:2])
|
xs = slice(*box[0:2])
|
||||||
args_contours_box = args_contours[np.array(arg_text_con) == iij]
|
args_contours_box_main = args_contours_main[arg_text_con_main == iij]
|
||||||
con_inter_box = []
|
con_inter_box = contours_only_text_parent[args_contours_box_main]
|
||||||
con_inter_box_h = []
|
con_inter_box_h = []
|
||||||
for i in range(len(args_contours_box)):
|
|
||||||
con_inter_box.append(contours_only_text_parent[args_contours_box[i]])
|
|
||||||
|
|
||||||
indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
||||||
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2])
|
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2])
|
||||||
|
@ -2845,9 +2821,9 @@ class Eynollah:
|
||||||
indexes_sorted_main = indexes_sorted[kind_of_texts_sorted == 1]
|
indexes_sorted_main = indexes_sorted[kind_of_texts_sorted == 1]
|
||||||
indexes_by_type_main = index_by_kind_sorted[kind_of_texts_sorted == 1]
|
indexes_by_type_main = index_by_kind_sorted[kind_of_texts_sorted == 1]
|
||||||
|
|
||||||
for zahler, _ in enumerate(args_contours_box):
|
for zahler, _ in enumerate(args_contours_box_main):
|
||||||
arg_order_v = indexes_sorted_main[zahler]
|
arg_order_v = indexes_sorted_main[zahler]
|
||||||
order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \
|
order_by_con_main[args_contours_box_main[indexes_by_type_main[zahler]]] = \
|
||||||
np.flatnonzero(indexes_sorted == arg_order_v) + ref_point
|
np.flatnonzero(indexes_sorted == arg_order_v) + ref_point
|
||||||
|
|
||||||
for jji, _ in enumerate(id_of_texts):
|
for jji, _ in enumerate(id_of_texts):
|
||||||
|
@ -2855,14 +2831,8 @@ class Eynollah:
|
||||||
id_of_texts_tot.append(id_of_texts[jji])
|
id_of_texts_tot.append(id_of_texts[jji])
|
||||||
ref_point += len(id_of_texts)
|
ref_point += len(id_of_texts)
|
||||||
|
|
||||||
order_of_texts_tot = []
|
order_of_texts_tot = order_by_con_main
|
||||||
|
order_text_new = np.argsort(order_of_texts_tot)
|
||||||
for tj1 in range(len(contours_only_text_parent)):
|
|
||||||
order_of_texts_tot.append(int(order_by_con_main[tj1]))
|
|
||||||
|
|
||||||
order_text_new = []
|
|
||||||
for iii in range(len(order_of_texts_tot)):
|
|
||||||
order_text_new.append(np.flatnonzero(np.array(order_of_texts_tot) == iii))
|
|
||||||
|
|
||||||
self.logger.debug("exit do_order_of_regions_no_full_layout")
|
self.logger.debug("exit do_order_of_regions_no_full_layout")
|
||||||
return order_text_new, id_of_texts_tot
|
return order_text_new, id_of_texts_tot
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue